diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
index c0a7795a3..c7962c93d 100644
--- a/.github/workflows/conformance.yml
+++ b/.github/workflows/conformance.yml
@@ -13,11 +13,8 @@ on:
branches: [ main ]
types: [opened, synchronize, reopened]
paths:
- - 'llama_stack/**'
- - '!llama_stack/ui/**'
- - 'tests/**'
- - 'uv.lock'
- - 'pyproject.toml'
+ - 'docs/_static/llama-stack-spec.yaml'
+ - 'docs/_static/llama-stack-spec.html'
- '.github/workflows/conformance.yml' # This workflow itself
concurrency:
@@ -43,10 +40,27 @@ jobs:
ref: ${{ github.event.pull_request.base.ref }}
path: 'base'
+ # Cache oasdiff to avoid checksum failures and speed up builds
+ - name: Cache oasdiff
+ id: cache-oasdiff
+ uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809
+ with:
+ path: ~/oasdiff
+ key: oasdiff-${{ runner.os }}
+
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
- name: Install oasdiff
+ if: steps.cache-oasdiff.outputs.cache-hit != 'true'
run: |
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
+ cp /usr/local/bin/oasdiff ~/oasdiff
+
+ # Setup cached oasdiff
+ - name: Setup cached oasdiff
+ if: steps.cache-oasdiff.outputs.cache-hit == 'true'
+ run: |
+ sudo cp ~/oasdiff /usr/local/bin/oasdiff
+ sudo chmod +x /usr/local/bin/oasdiff
# Run oasdiff to detect breaking changes in the API specification
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 000208043..b5845be53 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -47,11 +47,21 @@ jobs:
run: npm ci
working-directory: llama_stack/ui
- - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+ - name: Run pre-commit
+ id: precommit
+ uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
+ continue-on-error: true
env:
SKIP: no-commit-to-branch
RUFF_OUTPUT_FORMAT: github
+ - name: Check pre-commit results
+ if: steps.precommit.outcome == 'failure'
+ run: |
+ echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
+ echo "::warning::Some pre-commit hooks failed. Check the output above for details."
+ exit 1
+
- name: Debug
run: |
echo "github.ref: ${{ github.ref }}"
@@ -79,17 +89,23 @@ jobs:
echo "No changes to commit"
fi
- - name: Verify if there are any diff files after pre-commit
+ - name: Verify no uncommitted changes
if: github.actor != 'dependabot[bot]'
run: |
- git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
+ if ! git diff --exit-code; then
+ echo "::error::There are uncommitted changes after pre-commit. Please run 'pre-commit run --all-files' locally and commit the fixes."
+ echo "::warning::Files with changes:"
+ git diff --name-status
+ exit 1
+ fi
- name: Verify if there are any new files after pre-commit
if: github.actor != 'dependabot[bot]'
run: |
unstaged_files=$(git ls-files --others --exclude-standard)
if [ -n "$unstaged_files" ]; then
- echo "There are uncommitted new files, run pre-commit locally and commit again"
+ echo "::error::There are new untracked files after pre-commit. Please run 'pre-commit run --all-files' locally and commit the fixes."
+ echo "::warning::New files:"
echo "$unstaged_files"
exit 1
fi
diff --git a/docs/source/distributions/k8s-benchmark/README.md b/benchmarking/k8s-benchmark/README.md
similarity index 98%
rename from docs/source/distributions/k8s-benchmark/README.md
rename to benchmarking/k8s-benchmark/README.md
index 42da4d466..3b0d0c4db 100644
--- a/docs/source/distributions/k8s-benchmark/README.md
+++ b/benchmarking/k8s-benchmark/README.md
@@ -34,13 +34,12 @@ This data enables data-driven architectural decisions and performance optimizati
**1. Deploy base k8s infrastructure:**
```bash
-cd ../k8s
+cd ../../docs/source/distributions/k8s
./apply.sh
```
**2. Deploy benchmark components:**
```bash
-cd ../k8s-benchmark
./apply.sh
```
@@ -56,7 +55,6 @@ kubectl get pods
**Benchmark Llama Stack (default):**
```bash
-cd docs/source/distributions/k8s-benchmark/
./run-benchmark.sh
```
diff --git a/docs/source/distributions/k8s-benchmark/apply.sh b/benchmarking/k8s-benchmark/apply.sh
similarity index 100%
rename from docs/source/distributions/k8s-benchmark/apply.sh
rename to benchmarking/k8s-benchmark/apply.sh
diff --git a/docs/source/distributions/k8s-benchmark/benchmark.py b/benchmarking/k8s-benchmark/benchmark.py
similarity index 80%
rename from docs/source/distributions/k8s-benchmark/benchmark.py
rename to benchmarking/k8s-benchmark/benchmark.py
index 3d0d18150..d5e34aa23 100644
--- a/docs/source/distributions/k8s-benchmark/benchmark.py
+++ b/benchmarking/k8s-benchmark/benchmark.py
@@ -14,7 +14,7 @@ import os
import random
import statistics
import time
-from typing import Tuple
+
import aiohttp
@@ -55,10 +55,50 @@ class BenchmarkStats:
total_time = self.end_time - self.start_time
success_rate = (self.success_count / self.total_requests) * 100
-
- print(f"\n{'='*60}")
- print(f"BENCHMARK RESULTS")
- print(f"{'='*60}")
+
+ print(f"\n{'=' * 60}")
+ print("BENCHMARK RESULTS")
+
+ print("\nResponse Time Statistics:")
+ print(f" Mean: {statistics.mean(self.response_times):.3f}s")
+ print(f" Median: {statistics.median(self.response_times):.3f}s")
+ print(f" Min: {min(self.response_times):.3f}s")
+ print(f" Max: {max(self.response_times):.3f}s")
+
+ if len(self.response_times) > 1:
+ print(f" Std Dev: {statistics.stdev(self.response_times):.3f}s")
+
+ percentiles = [50, 90, 95, 99]
+ sorted_times = sorted(self.response_times)
+ print("\nPercentiles:")
+ for p in percentiles:
+ idx = int(len(sorted_times) * p / 100) - 1
+ idx = max(0, min(idx, len(sorted_times) - 1))
+ print(f" P{p}: {sorted_times[idx]:.3f}s")
+
+ if self.ttft_times:
+ print("\nTime to First Token (TTFT) Statistics:")
+ print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
+ print(f" Median: {statistics.median(self.ttft_times):.3f}s")
+ print(f" Min: {min(self.ttft_times):.3f}s")
+ print(f" Max: {max(self.ttft_times):.3f}s")
+
+ if len(self.ttft_times) > 1:
+ print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
+
+ sorted_ttft = sorted(self.ttft_times)
+ print("\nTTFT Percentiles:")
+ for p in percentiles:
+ idx = int(len(sorted_ttft) * p / 100) - 1
+ idx = max(0, min(idx, len(sorted_ttft) - 1))
+ print(f" P{p}: {sorted_ttft[idx]:.3f}s")
+
+ if self.chunks_received:
+ print("\nStreaming Statistics:")
+ print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
+ print(f" Total chunks received: {sum(self.chunks_received)}")
+
+ print(f"{'=' * 60}")
print(f"Total time: {total_time:.2f}s")
print(f"Concurrent users: {self.concurrent_users}")
print(f"Total requests: {self.total_requests}")
@@ -66,55 +106,16 @@ class BenchmarkStats:
print(f"Failed requests: {len(self.errors)}")
print(f"Success rate: {success_rate:.1f}%")
print(f"Requests per second: {self.success_count / total_time:.2f}")
-
- print(f"\nResponse Time Statistics:")
- print(f" Mean: {statistics.mean(self.response_times):.3f}s")
- print(f" Median: {statistics.median(self.response_times):.3f}s")
- print(f" Min: {min(self.response_times):.3f}s")
- print(f" Max: {max(self.response_times):.3f}s")
-
- if len(self.response_times) > 1:
- print(f" Std Dev: {statistics.stdev(self.response_times):.3f}s")
-
- percentiles = [50, 90, 95, 99]
- sorted_times = sorted(self.response_times)
- print(f"\nPercentiles:")
- for p in percentiles:
- idx = int(len(sorted_times) * p / 100) - 1
- idx = max(0, min(idx, len(sorted_times) - 1))
- print(f" P{p}: {sorted_times[idx]:.3f}s")
-
- if self.ttft_times:
- print(f"\nTime to First Token (TTFT) Statistics:")
- print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
- print(f" Median: {statistics.median(self.ttft_times):.3f}s")
- print(f" Min: {min(self.ttft_times):.3f}s")
- print(f" Max: {max(self.ttft_times):.3f}s")
-
- if len(self.ttft_times) > 1:
- print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
-
- sorted_ttft = sorted(self.ttft_times)
- print(f"\nTTFT Percentiles:")
- for p in percentiles:
- idx = int(len(sorted_ttft) * p / 100) - 1
- idx = max(0, min(idx, len(sorted_ttft) - 1))
- print(f" P{p}: {sorted_ttft[idx]:.3f}s")
-
- if self.chunks_received:
- print(f"\nStreaming Statistics:")
- print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
- print(f" Total chunks received: {sum(self.chunks_received)}")
-
+
if self.errors:
- print(f"\nErrors (showing first 5):")
+ print("\nErrors (showing first 5):")
for error in self.errors[:5]:
print(f" {error}")
class LlamaStackBenchmark:
def __init__(self, base_url: str, model_id: str):
- self.base_url = base_url.rstrip('/')
+ self.base_url = base_url.rstrip("/")
self.model_id = model_id
self.headers = {"Content-Type": "application/json"}
self.test_messages = [
@@ -125,74 +126,67 @@ class LlamaStackBenchmark:
[
{"role": "user", "content": "What is machine learning?"},
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
- {"role": "user", "content": "Can you give me a practical example?"}
- ]
+ {"role": "user", "content": "Can you give me a practical example?"},
+ ],
]
-
- async def make_async_streaming_request(self) -> Tuple[float, int, float | None, str | None]:
+ async def make_async_streaming_request(self) -> tuple[float, int, float | None, str | None]:
"""Make a single async streaming chat completion request."""
messages = random.choice(self.test_messages)
- payload = {
- "model": self.model_id,
- "messages": messages,
- "stream": True,
- "max_tokens": 100
- }
-
+ payload = {"model": self.model_id, "messages": messages, "stream": True, "max_tokens": 100}
+
start_time = time.time()
chunks_received = 0
ttft = None
error = None
-
+
session = aiohttp.ClientSession()
-
+
try:
async with session.post(
f"{self.base_url}/chat/completions",
headers=self.headers,
json=payload,
- timeout=aiohttp.ClientTimeout(total=30)
+ timeout=aiohttp.ClientTimeout(total=30),
) as response:
if response.status == 200:
async for line in response.content:
if line:
- line_str = line.decode('utf-8').strip()
- if line_str.startswith('data: '):
+ line_str = line.decode("utf-8").strip()
+ if line_str.startswith("data: "):
chunks_received += 1
if ttft is None:
ttft = time.time() - start_time
- if line_str == 'data: [DONE]':
+ if line_str == "data: [DONE]":
break
-
+
if chunks_received == 0:
error = "No streaming chunks received"
else:
text = await response.text()
error = f"HTTP {response.status}: {text[:100]}"
-
+
except Exception as e:
error = f"Request error: {str(e)}"
finally:
await session.close()
-
+
response_time = time.time() - start_time
return response_time, chunks_received, ttft, error
-
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
"""Run benchmark using async requests for specified duration."""
stats = BenchmarkStats()
stats.concurrent_users = concurrent_users
stats.start_time = time.time()
-
+
print(f"Starting benchmark: {duration}s duration, {concurrent_users} concurrent users")
print(f"Target URL: {self.base_url}/chat/completions")
print(f"Model: {self.model_id}")
-
+
connector = aiohttp.TCPConnector(limit=concurrent_users)
- async with aiohttp.ClientSession(connector=connector) as session:
-
+ async with aiohttp.ClientSession(connector=connector):
+
async def worker(worker_id: int):
"""Worker that sends requests sequentially until canceled."""
request_count = 0
@@ -201,12 +195,12 @@ class LlamaStackBenchmark:
response_time, chunks, ttft, error = await self.make_async_streaming_request()
await stats.add_result(response_time, chunks, ttft, error)
request_count += 1
-
+
except asyncio.CancelledError:
break
except Exception as e:
await stats.add_result(0, 0, None, f"Worker {worker_id} error: {str(e)}")
-
+
# Progress reporting task
async def progress_reporter():
last_report_time = time.time()
@@ -215,48 +209,52 @@ class LlamaStackBenchmark:
await asyncio.sleep(1) # Report every second
if time.time() >= last_report_time + 10: # Report every 10 seconds
elapsed = time.time() - stats.start_time
- print(f"Completed: {stats.total_requests} requests in {elapsed:.1f}s")
+ print(
+ f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}"
+ )
last_report_time = time.time()
except asyncio.CancelledError:
break
-
+
# Spawn concurrent workers
tasks = [asyncio.create_task(worker(i)) for i in range(concurrent_users)]
progress_task = asyncio.create_task(progress_reporter())
tasks.append(progress_task)
-
+
# Wait for duration then cancel all tasks
await asyncio.sleep(duration)
-
+
for task in tasks:
task.cancel()
-
+
# Wait for all tasks to complete
await asyncio.gather(*tasks, return_exceptions=True)
-
+
stats.end_time = time.time()
return stats
def main():
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
- parser.add_argument("--base-url", default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
- help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)")
- parser.add_argument("--model", default=os.getenv("INFERENCE_MODEL", "test-model"),
- help="Model ID to use for requests")
- parser.add_argument("--duration", type=int, default=60,
- help="Duration in seconds to run benchmark (default: 60)")
- parser.add_argument("--concurrent", type=int, default=10,
- help="Number of concurrent users (default: 10)")
-
+ parser.add_argument(
+ "--base-url",
+ default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
+ help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)",
+ )
+ parser.add_argument(
+ "--model", default=os.getenv("INFERENCE_MODEL", "test-model"), help="Model ID to use for requests"
+ )
+ parser.add_argument("--duration", type=int, default=60, help="Duration in seconds to run benchmark (default: 60)")
+ parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent users (default: 10)")
+
args = parser.parse_args()
-
+
benchmark = LlamaStackBenchmark(args.base_url, args.model)
-
+
try:
stats = asyncio.run(benchmark.run_benchmark(args.duration, args.concurrent))
stats.print_summary()
-
+
except KeyboardInterrupt:
print("\nBenchmark interrupted by user")
except Exception as e:
diff --git a/docs/source/distributions/k8s-benchmark/openai-mock-server.py b/benchmarking/k8s-benchmark/openai-mock-server.py
similarity index 60%
rename from docs/source/distributions/k8s-benchmark/openai-mock-server.py
rename to benchmarking/k8s-benchmark/openai-mock-server.py
index de0680842..9e898af8e 100755
--- a/docs/source/distributions/k8s-benchmark/openai-mock-server.py
+++ b/benchmarking/k8s-benchmark/openai-mock-server.py
@@ -11,180 +11,192 @@ OpenAI-compatible mock server that returns:
- Valid OpenAI-formatted chat completion responses with dynamic content
"""
-from flask import Flask, request, jsonify, Response
-import time
-import random
-import uuid
-import json
import argparse
+import json
import os
+import random
+import time
+import uuid
+
+from flask import Flask, Response, jsonify, request
app = Flask(__name__)
+
# Models from environment variables
def get_models():
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
model_ids = [m.strip() for m in models_str.split(",") if m.strip()]
-
+
return {
"object": "list",
"data": [
- {
- "id": model_id,
- "object": "model",
- "created": 1234567890,
- "owned_by": "vllm"
- }
- for model_id in model_ids
- ]
+ {"id": model_id, "object": "model", "created": 1234567890, "owned_by": "vllm"} for model_id in model_ids
+ ],
}
+
def generate_random_text(length=50):
"""Generate random but coherent text for responses."""
words = [
- "Hello", "there", "I'm", "an", "AI", "assistant", "ready", "to", "help", "you",
- "with", "your", "questions", "and", "tasks", "today", "Let", "me","know", "what",
- "you'd", "like", "to", "discuss", "or", "explore", "together", "I", "can", "assist",
- "with", "various", "topics", "including", "coding", "writing", "analysis", "and", "more"
+ "Hello",
+ "there",
+ "I'm",
+ "an",
+ "AI",
+ "assistant",
+ "ready",
+ "to",
+ "help",
+ "you",
+ "with",
+ "your",
+ "questions",
+ "and",
+ "tasks",
+ "today",
+ "Let",
+ "me",
+ "know",
+ "what",
+ "you'd",
+ "like",
+ "to",
+ "discuss",
+ "or",
+ "explore",
+ "together",
+ "I",
+ "can",
+ "assist",
+ "with",
+ "various",
+ "topics",
+ "including",
+ "coding",
+ "writing",
+ "analysis",
+ "and",
+ "more",
]
return " ".join(random.choices(words, k=length))
-@app.route('/v1/models', methods=['GET'])
+
+@app.route("/v1/models", methods=["GET"])
def list_models():
models = get_models()
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
return jsonify(models)
-@app.route('/v1/chat/completions', methods=['POST'])
+
+@app.route("/v1/chat/completions", methods=["POST"])
def chat_completions():
"""Return OpenAI-formatted chat completion responses."""
data = request.get_json()
- default_model = get_models()['data'][0]['id']
- model = data.get('model', default_model)
- messages = data.get('messages', [])
- stream = data.get('stream', False)
-
+ default_model = get_models()["data"][0]["id"]
+ model = data.get("model", default_model)
+ messages = data.get("messages", [])
+ stream = data.get("stream", False)
+
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
-
+
if stream:
return handle_streaming_completion(model, messages)
else:
return handle_non_streaming_completion(model, messages)
+
def handle_non_streaming_completion(model, messages):
response_text = generate_random_text(random.randint(20, 80))
-
+
# Calculate realistic token counts
- prompt_tokens = sum(len(str(msg.get('content', '')).split()) for msg in messages)
+ prompt_tokens = sum(len(str(msg.get("content", "")).split()) for msg in messages)
completion_tokens = len(response_text.split())
-
+
response = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion",
"created": int(time.time()),
"model": model,
- "choices": [
- {
- "index": 0,
- "message": {
- "role": "assistant",
- "content": response_text
- },
- "finish_reason": "stop"
- }
- ],
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
"usage": {
"prompt_tokens": prompt_tokens,
"completion_tokens": completion_tokens,
- "total_tokens": prompt_tokens + completion_tokens
- }
+ "total_tokens": prompt_tokens + completion_tokens,
+ },
}
-
+
return jsonify(response)
+
def handle_streaming_completion(model, messages):
def generate_stream():
# Generate response text
full_response = generate_random_text(random.randint(30, 100))
words = full_response.split()
-
+
# Send initial chunk
initial_chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
- "choices": [
- {
- "index": 0,
- "delta": {"role": "assistant", "content": ""}
- }
- ]
+ "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}}],
}
yield f"data: {json.dumps(initial_chunk)}\n\n"
-
+
# Send word by word
for i, word in enumerate(words):
chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
- "object": "chat.completion.chunk",
+ "object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
- "choices": [
- {
- "index": 0,
- "delta": {"content": f"{word} " if i < len(words) - 1 else word}
- }
- ]
+ "choices": [{"index": 0, "delta": {"content": f"{word} " if i < len(words) - 1 else word}}],
}
yield f"data: {json.dumps(chunk)}\n\n"
# Configurable delay to simulate realistic streaming
stream_delay = float(os.getenv("STREAM_DELAY_SECONDS", "0.005"))
time.sleep(stream_delay)
-
+
# Send final chunk
final_chunk = {
"id": f"chatcmpl-{uuid.uuid4().hex[:8]}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": model,
- "choices": [
- {
- "index": 0,
- "delta": {"content": ""},
- "finish_reason": "stop"
- }
- ]
+ "choices": [{"index": 0, "delta": {"content": ""}, "finish_reason": "stop"}],
}
yield f"data: {json.dumps(final_chunk)}\n\n"
yield "data: [DONE]\n\n"
-
+
return Response(
generate_stream(),
- mimetype='text/event-stream',
+ mimetype="text/event-stream",
headers={
- 'Cache-Control': 'no-cache',
- 'Connection': 'keep-alive',
- 'Access-Control-Allow-Origin': '*',
- }
+ "Cache-Control": "no-cache",
+ "Connection": "keep-alive",
+ "Access-Control-Allow-Origin": "*",
+ },
)
-@app.route('/health', methods=['GET'])
+
+@app.route("/health", methods=["GET"])
def health():
return jsonify({"status": "healthy", "type": "openai-mock"})
-if __name__ == '__main__':
- parser = argparse.ArgumentParser(description='OpenAI-compatible mock server')
- parser.add_argument('--port', type=int, default=8081,
- help='Port to run the server on (default: 8081)')
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="OpenAI-compatible mock server")
+ parser.add_argument("--port", type=int, default=8081, help="Port to run the server on (default: 8081)")
args = parser.parse_args()
-
+
port = args.port
-
+
models = get_models()
print("Starting OpenAI-compatible mock server...")
print(f"- /models endpoint with: {[m['id'] for m in models['data']]}")
print("- OpenAI-formatted chat/completion responses with dynamic content")
print("- Streaming support with valid SSE format")
print(f"- Listening on: http://0.0.0.0:{port}")
- app.run(host='0.0.0.0', port=port, debug=False)
+ app.run(host="0.0.0.0", port=port, debug=False)
diff --git a/docs/source/distributions/k8s-benchmark/profile_running_server.sh b/benchmarking/k8s-benchmark/profile_running_server.sh
similarity index 100%
rename from docs/source/distributions/k8s-benchmark/profile_running_server.sh
rename to benchmarking/k8s-benchmark/profile_running_server.sh
diff --git a/docs/source/distributions/k8s-benchmark/run-benchmark.sh b/benchmarking/k8s-benchmark/run-benchmark.sh
similarity index 100%
rename from docs/source/distributions/k8s-benchmark/run-benchmark.sh
rename to benchmarking/k8s-benchmark/run-benchmark.sh
diff --git a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml
similarity index 100%
rename from docs/source/distributions/k8s-benchmark/stack-configmap.yaml
rename to benchmarking/k8s-benchmark/stack-configmap.yaml
diff --git a/docs/source/distributions/k8s-benchmark/stack-k8s.yaml.template b/benchmarking/k8s-benchmark/stack-k8s.yaml.template
similarity index 100%
rename from docs/source/distributions/k8s-benchmark/stack-k8s.yaml.template
rename to benchmarking/k8s-benchmark/stack-k8s.yaml.template
diff --git a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml
similarity index 92%
rename from docs/source/distributions/k8s-benchmark/stack_run_config.yaml
rename to benchmarking/k8s-benchmark/stack_run_config.yaml
index f8ff7811b..5a9e2ae4f 100644
--- a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml
+++ b/benchmarking/k8s-benchmark/stack_run_config.yaml
@@ -2,6 +2,7 @@ version: '2'
image_name: kubernetes-benchmark-demo
apis:
- agents
+- files
- inference
- files
- safety
@@ -20,6 +21,14 @@ providers:
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
config: {}
+ files:
+ - provider_id: meta-reference-files
+ provider_type: inline::localfs
+ config:
+ storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
+ metadata_store:
+ type: sqlite
+ db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
vector_io:
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
diff --git a/docs/_static/css/my_theme.css b/docs/_static/css/my_theme.css
index d078ec057..7dcd97c9b 100644
--- a/docs/_static/css/my_theme.css
+++ b/docs/_static/css/my_theme.css
@@ -1,5 +1,106 @@
@import url("theme.css");
+/* Horizontal Navigation Bar */
+.horizontal-nav {
+ background-color: #ffffff;
+ border-bottom: 1px solid #e5e5e5;
+ padding: 0;
+ position: fixed;
+ top: 0;
+ left: 0;
+ right: 0;
+ z-index: 1050;
+ height: 50px;
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+}
+
+[data-theme="dark"] .horizontal-nav {
+ background-color: #1a1a1a;
+ border-bottom: 1px solid #333;
+}
+
+.horizontal-nav .nav-container {
+ max-width: 1200px;
+ margin: 0 auto;
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ padding: 0 20px;
+ height: 100%;
+}
+
+.horizontal-nav .nav-brand {
+ font-size: 18px;
+ font-weight: 600;
+ color: #333;
+ text-decoration: none;
+}
+
+[data-theme="dark"] .horizontal-nav .nav-brand {
+ color: #fff;
+}
+
+.horizontal-nav .nav-links {
+ display: flex;
+ align-items: center;
+ gap: 30px;
+ list-style: none;
+ margin: 0;
+ padding: 0;
+}
+
+.horizontal-nav .nav-links a {
+ color: #666;
+ text-decoration: none;
+ font-size: 14px;
+ font-weight: 500;
+ padding: 8px 12px;
+ border-radius: 6px;
+ transition: all 0.2s ease;
+}
+
+.horizontal-nav .nav-links a:hover,
+.horizontal-nav .nav-links a.active {
+ color: #333;
+ background-color: #f5f5f5;
+}
+
+.horizontal-nav .nav-links a.active {
+ font-weight: 600;
+}
+
+[data-theme="dark"] .horizontal-nav .nav-links a {
+ color: #ccc;
+}
+
+[data-theme="dark"] .horizontal-nav .nav-links a:hover,
+[data-theme="dark"] .horizontal-nav .nav-links a.active {
+ color: #fff;
+ background-color: #333;
+}
+
+.horizontal-nav .nav-links .github-link {
+ display: flex;
+ align-items: center;
+ gap: 6px;
+}
+
+.horizontal-nav .nav-links .github-icon {
+ width: 16px;
+ height: 16px;
+ fill: currentColor;
+}
+
+/* Adjust main content to account for fixed nav */
+.wy-nav-side {
+ top: 50px;
+ height: calc(100vh - 50px);
+}
+
+.wy-nav-content-wrap {
+ margin-top: 50px;
+}
+
.wy-nav-content {
max-width: 90%;
}
diff --git a/docs/_static/js/horizontal_nav.js b/docs/_static/js/horizontal_nav.js
new file mode 100644
index 000000000..c2384f9d5
--- /dev/null
+++ b/docs/_static/js/horizontal_nav.js
@@ -0,0 +1,44 @@
+// Horizontal Navigation Bar for Llama Stack Documentation
+document.addEventListener('DOMContentLoaded', function() {
+ // Create the horizontal navigation HTML
+ const navHTML = `
+
+ `;
+
+ // Insert the navigation at the beginning of the body
+ document.body.insertAdjacentHTML('afterbegin', navHTML);
+
+ // Update navigation links based on current page
+ updateActiveNav();
+});
+
+function updateActiveNav() {
+ const currentPath = window.location.pathname;
+ const navLinks = document.querySelectorAll('.horizontal-nav .nav-links a');
+
+ navLinks.forEach(link => {
+ // Remove any existing active classes
+ link.classList.remove('active');
+
+ // Add active class based on current path
+ if (currentPath === '/' && link.getAttribute('href') === '/') {
+ link.classList.add('active');
+ } else if (currentPath.includes('/references/api_reference/') && link.getAttribute('href').includes('api_reference')) {
+ link.classList.add('active');
+ }
+ });
+}
diff --git a/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
new file mode 100644
index 000000000..d44ac6994
--- /dev/null
+++ b/docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
@@ -0,0 +1,701 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1ztegmwm4sp",
+ "metadata": {},
+ "source": [
+ "## LlamaStack + LangChain Integration Tutorial\n",
+ "\n",
+ "This notebook demonstrates how to integrate **LlamaStack** with **LangChain** to build a complete RAG (Retrieval-Augmented Generation) system.\n",
+ "\n",
+ "### Overview\n",
+ "\n",
+ "- **LlamaStack**: Provides the infrastructure for running LLMs and Open AI Compatible Vector Stores\n",
+ "- **LangChain**: Provides the framework for chaining operations and prompt templates\n",
+ "- **Integration**: Uses LlamaStack's OpenAI-compatible API with LangChain\n",
+ "\n",
+ "### What You'll See\n",
+ "\n",
+ "1. Setting up LlamaStack server with Fireworks AI provider\n",
+ "2. Creating and Querying Vector Stores\n",
+ "3. Building RAG chains with LangChain + LLAMAStack\n",
+ "4. Querying the chain for relevant information\n",
+ "\n",
+ "### Prerequisites\n",
+ "\n",
+ "- Fireworks API key\n",
+ "\n",
+ "---\n",
+ "\n",
+ "### 1. Installation and Setup"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ktr5ls2cas",
+ "metadata": {},
+ "source": [
+ "#### Install Required Dependencies\n",
+ "\n",
+ "First, we install all the necessary packages for LangChain and FastAPI integration."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: uv in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.7.20)\n",
+ "\u001b[2mUsing Python 3.12.11 environment at: /Users/swapna942/miniconda3\u001b[0m\n",
+ "\u001b[2mAudited \u001b[1m7 packages\u001b[0m \u001b[2min 42ms\u001b[0m\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install uv\n",
+ "!uv pip install fastapi uvicorn \"langchain>=0.2\" langchain-openai \\\n",
+ " langchain-community langchain-text-splitters \\\n",
+ " faiss-cpu"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "wmt9jvqzh7n",
+ "metadata": {},
+ "source": [
+ "### 2. LlamaStack Server Setup\n",
+ "\n",
+ "#### Build and Start LlamaStack Server\n",
+ "\n",
+ "This section sets up the LlamaStack server with:\n",
+ "- **Fireworks AI** as the inference provider\n",
+ "- **Sentence Transformers** for embeddings\n",
+ "\n",
+ "The server runs on `localhost:8321` and provides OpenAI-compatible endpoints."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import subprocess\n",
+ "import time\n",
+ "\n",
+ "# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n",
+ "# instead of trying to use system Python globally, which could cause permission issues\n",
+ "# and package conflicts with the system's Python installation\n",
+ "if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
+ " del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
+ "\n",
+ "def run_llama_stack_server_background():\n",
+ " \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
+ " log_file = open(\"llama_stack_server.log\", \"w\")\n",
+ " process = subprocess.Popen(\n",
+ " \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
+ " shell=True,\n",
+ " stdout=log_file,\n",
+ " stderr=log_file,\n",
+ " text=True,\n",
+ " )\n",
+ "\n",
+ " print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n",
+ " return process\n",
+ "\n",
+ "\n",
+ "def wait_for_server_to_start():\n",
+ " import requests\n",
+ " from requests.exceptions import ConnectionError\n",
+ "\n",
+ " url = \"http://0.0.0.0:8321/v1/health\"\n",
+ " max_retries = 30\n",
+ " retry_interval = 1\n",
+ "\n",
+ " print(\"Waiting for server to start\", end=\"\")\n",
+ " for _ in range(max_retries):\n",
+ " try:\n",
+ " response = requests.get(url)\n",
+ " if response.status_code == 200:\n",
+ " print(\"\\nServer is ready!\")\n",
+ " return True\n",
+ " except ConnectionError:\n",
+ " print(\".\", end=\"\", flush=True)\n",
+ " time.sleep(retry_interval)\n",
+ "\n",
+ " print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
+ " return False\n",
+ "\n",
+ "\n",
+ "def kill_llama_stack_server():\n",
+ " # Kill any existing llama stack server processes using pkill command\n",
+ " os.system(\"pkill -f llama_stack.core.server.server\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "28bd8dbd-4576-4e76-813f-21ab94db44a2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Building and starting Llama Stack server with PID: 19747\n",
+ "Waiting for server to start....\n",
+ "Server is ready!\n"
+ ]
+ }
+ ],
+ "source": [
+ "server_process = run_llama_stack_server_background()\n",
+ "assert wait_for_server_to_start()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "gr9cdcg4r7n",
+ "metadata": {},
+ "source": [
+ "#### Install LlamaStack Client\n",
+ "\n",
+ "Install the client library to interact with the LlamaStack server."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "487d2dbc-d071-400e-b4f0-dcee58f8dc95",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\u001b[2mUsing Python 3.12.11 environment at: /Users/swapna942/miniconda3\u001b[0m\n",
+ "\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 27ms\u001b[0m\u001b[0m\n"
+ ]
+ }
+ ],
+ "source": [
+ "!uv pip install llama_stack_client"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0j5hag7l9x89",
+ "metadata": {},
+ "source": [
+ "### 3. Initialize LlamaStack Client\n",
+ "\n",
+ "Create a client connection to the LlamaStack server with API keys for different providers:\n",
+ "\n",
+ "- **Fireworks API Key**: For Fireworks models\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from llama_stack_client import LlamaStackClient\n",
+ "\n",
+ "client = LlamaStackClient(\n",
+ " base_url=\"http://0.0.0.0:8321\",\n",
+ " provider_data={\"fireworks_api_key\": \"***\"},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "vwhexjy1e8o",
+ "metadata": {},
+ "source": [
+ "#### Explore Available Models and Safety Features\n",
+ "\n",
+ "Check what models and safety shields are available through your LlamaStack instance."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Available Fireworks models:\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
+ "- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
+ "- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
+ "- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
+ "- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
+ "- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
+ "----\n",
+ "Available shields (safety models):\n",
+ "code-scanner\n",
+ "llama-guard\n",
+ "nemo-guardrail\n",
+ "----\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(\"Available Fireworks models:\")\n",
+ "for m in client.models.list():\n",
+ " if m.identifier.startswith(\"fireworks/\"):\n",
+ " print(f\"- {m.identifier}\")\n",
+ "\n",
+ "print(\"----\")\n",
+ "print(\"Available shields (safety models):\")\n",
+ "for s in client.shields.list():\n",
+ " print(s.identifier)\n",
+ "print(\"----\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "gojp7at31ht",
+ "metadata": {},
+ "source": [
+ "### 4. Vector Store Setup\n",
+ "\n",
+ "#### Create a Vector Store with File Upload\n",
+ "\n",
+ "Create a vector store using the OpenAI-compatible vector stores API:\n",
+ "\n",
+ "- **Vector Store**: OpenAI-compatible vector store for document storage\n",
+ "- **File Upload**: Automatic chunking and embedding of uploaded files \n",
+ "- **Embedding Model**: Sentence Transformers model for text embeddings\n",
+ "- **Dimensions**: 384-dimensional embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "be2c2899-ea53-4e5f-b6b8-ed425f5d6572",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "File(id='file-54652c95c56c4c34918a97d7ff8a4320', bytes=41, created_at=1757442621, expires_at=1788978621, filename='shipping_policy.txt', object='file', purpose='assistants')\n",
+ "File(id='file-fb1227c1d1854da1bd774d21e5b7e41c', bytes=48, created_at=1757442621, expires_at=1788978621, filename='returns_policy.txt', object='file', purpose='assistants')\n",
+ "File(id='file-673f874852fe42798675a13d06a256e2', bytes=45, created_at=1757442621, expires_at=1788978621, filename='support.txt', object='file', purpose='assistants')\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores \"HTTP/1.1 200 OK\"\n"
+ ]
+ }
+ ],
+ "source": [
+ "from io import BytesIO\n",
+ "\n",
+ "docs = [\n",
+ " (\"Acme ships globally in 3-5 business days.\", {\"title\": \"Shipping Policy\"}),\n",
+ " (\"Returns are accepted within 30 days of purchase.\", {\"title\": \"Returns Policy\"}),\n",
+ " (\"Support is available 24/7 via chat and email.\", {\"title\": \"Support\"}),\n",
+ "]\n",
+ "\n",
+ "file_ids = []\n",
+ "for content, metadata in docs:\n",
+ " with BytesIO(content.encode()) as file_buffer:\n",
+ " file_buffer.name = f\"{metadata['title'].replace(' ', '_').lower()}.txt\"\n",
+ " create_file_response = client.files.create(file=file_buffer, purpose=\"assistants\")\n",
+ " print(create_file_response)\n",
+ " file_ids.append(create_file_response.id)\n",
+ "\n",
+ "# Create vector store with files\n",
+ "vector_store = client.vector_stores.create(\n",
+ " name=\"acme_docs\",\n",
+ " file_ids=file_ids,\n",
+ " embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
+ " embedding_dimension=384,\n",
+ " provider_id=\"faiss\"\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9061tmi1zpq",
+ "metadata": {},
+ "source": [
+ "#### Test Vector Store Search\n",
+ "\n",
+ "Query the vector store. This performs semantic search to find relevant documents based on the query."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ba9d1901-bd5e-4216-b3e6-19dc74551cc6",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Acme ships globally in 3-5 business days.\n",
+ "Returns are accepted within 30 days of purchase.\n"
+ ]
+ }
+ ],
+ "source": [
+ "search_response = client.vector_stores.search(\n",
+ " vector_store_id=vector_store.id,\n",
+ " query=\"How long does shipping take?\",\n",
+ " max_num_results=2\n",
+ ")\n",
+ "for result in search_response.data:\n",
+ " content = result.content[0].text\n",
+ " print(content)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "usne6mbspms",
+ "metadata": {},
+ "source": [
+ "### 5. LangChain Integration\n",
+ "\n",
+ "#### Configure LangChain with LlamaStack\n",
+ "\n",
+ "Set up LangChain to use LlamaStack's OpenAI-compatible API:\n",
+ "\n",
+ "- **Base URL**: Points to LlamaStack's OpenAI endpoint\n",
+ "- **Headers**: Include Fireworks API key for model access\n",
+ "- **Model**: Use Meta Llama v3p1 8b instruct model for inference"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "from langchain_openai import ChatOpenAI\n",
+ "\n",
+ "# Point LangChain to Llamastack Server\n",
+ "llm = ChatOpenAI(\n",
+ " base_url=\"http://0.0.0.0:8321/v1/openai/v1\",\n",
+ " api_key=\"dummy\",\n",
+ " model=\"fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
+ " default_headers={\"X-LlamaStack-Provider-Data\": '{\"fireworks_api_key\": \"***\"}'},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5a4ddpcuk3l",
+ "metadata": {},
+ "source": [
+ "#### Test LLM Connection\n",
+ "\n",
+ "Verify that LangChain can successfully communicate with the LlamaStack server."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "f88ffb5a-657b-4916-9375-c6ddc156c25e",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "AIMessage(content=\"A llama's gentle eyes shine bright,\\nIn the Andes, it roams through morning light.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': None, 'model_name': 'fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct', 'system_fingerprint': None, 'id': 'chatcmpl-602b5967-82a3-476b-9cd2-7d3b29b76ee8', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--0933c465-ff4d-4a7b-b7fb-fd97dd8244f3-0')"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Test llm with simple message\n",
+ "messages = [\n",
+ " {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
+ " {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
+ "]\n",
+ "llm.invoke(messages)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0xh0jg6a0l4a",
+ "metadata": {},
+ "source": [
+ "### 6. Building the RAG Chain\n",
+ "\n",
+ "#### Create a Complete RAG Pipeline\n",
+ "\n",
+ "Build a LangChain pipeline that combines:\n",
+ "\n",
+ "1. **Vector Search**: Query LlamaStack's Open AI compatible Vector Store\n",
+ "2. **Context Assembly**: Format retrieved documents\n",
+ "3. **Prompt Template**: Structure the input for the LLM\n",
+ "4. **LLM Generation**: Generate answers using context\n",
+ "5. **Output Parsing**: Extract the final response\n",
+ "\n",
+ "**Chain Flow**: `Query → Vector Search → Context + Question → LLM → Response`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "9684427d-dcc7-4544-9af5-8b110d014c42",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# LangChain for prompt template and chaining + LLAMA Stack Client Vector DB and LLM chat completion\n",
+ "from langchain_core.output_parsers import StrOutputParser\n",
+ "from langchain_core.prompts import ChatPromptTemplate\n",
+ "from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
+ "\n",
+ "\n",
+ "def join_docs(docs):\n",
+ " return \"\\n\\n\".join([f\"[{d.filename}] {d.content[0].text}\" for d in docs.data])\n",
+ "\n",
+ "PROMPT = ChatPromptTemplate.from_messages(\n",
+ " [\n",
+ " (\"system\", \"You are a helpful assistant. Use the following context to answer.\"),\n",
+ " (\"user\", \"Question: {question}\\n\\nContext:\\n{context}\"),\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "vector_step = RunnableLambda(\n",
+ " lambda x: client.vector_stores.search(\n",
+ " vector_store_id=vector_store.id,\n",
+ " query=x,\n",
+ " max_num_results=2\n",
+ " )\n",
+ " )\n",
+ "\n",
+ "chain = (\n",
+ " {\"context\": vector_step | RunnableLambda(join_docs), \"question\": RunnablePassthrough()}\n",
+ " | PROMPT\n",
+ " | llm\n",
+ " | StrOutputParser()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0onu6rhphlra",
+ "metadata": {},
+ "source": [
+ "### 7. Testing the RAG System\n",
+ "\n",
+ "#### Example 1: Shipping Query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "03322188-9509-446a-a4a8-ce3bb83ec87c",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "❓ How long does shipping take?\n",
+ "💡 Acme ships globally in 3-5 business days. This means that shipping typically takes between 3 to 5 working days from the date of dispatch or order fulfillment.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = \"How long does shipping take?\"\n",
+ "response = chain.invoke(query)\n",
+ "print(\"❓\", query)\n",
+ "print(\"💡\", response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b7krhqj88ku",
+ "metadata": {},
+ "source": [
+ "#### Example 2: Returns Policy Query"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "61995550-bb0b-46a8-a5d0-023207475d60",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n",
+ "INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "❓ Can I return a product after 40 days?\n",
+ "💡 Based on the provided context, you cannot return a product after 40 days. The return window is limited to 30 days from the date of purchase.\n"
+ ]
+ }
+ ],
+ "source": [
+ "query = \"Can I return a product after 40 days?\"\n",
+ "response = chain.invoke(query)\n",
+ "print(\"❓\", query)\n",
+ "print(\"💡\", response)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "h4w24fadvjs",
+ "metadata": {},
+ "source": [
+ "---\n",
+ "We have successfully built a RAG system that combines:\n",
+ "\n",
+ "- **LlamaStack** for infrastructure (LLM serving + Vector Store)\n",
+ "- **LangChain** for orchestration (prompts + chains)\n",
+ "- **Fireworks** for high-quality language models\n",
+ "\n",
+ "### Key Benefits\n",
+ "\n",
+ "1. **Unified Infrastructure**: Single server for LLMs and Vector Store\n",
+ "2. **OpenAI Compatibility**: Easy integration with existing LangChain code\n",
+ "3. **Multi-Provider Support**: Switch between different LLM providers\n",
+ "4. **Production Ready**: Built-in safety shields and monitoring\n",
+ "\n",
+ "### Next Steps\n",
+ "\n",
+ "- Add more sophisticated document processing\n",
+ "- Implement conversation memory\n",
+ "- Add safety filtering and monitoring\n",
+ "- Scale to larger document collections\n",
+ "- Integrate with web frameworks like FastAPI or Streamlit\n",
+ "\n",
+ "---\n",
+ "\n",
+ "##### 🔧 Cleanup\n",
+ "\n",
+ "Don't forget to stop the LlamaStack server when you're done:\n",
+ "\n",
+ "```python\n",
+ "kill_llama_stack_server()\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "15647c46-22ce-4698-af3f-8161329d8e3a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "kill_llama_stack_server()"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/source/building_applications/rag.md b/docs/source/building_applications/rag.md
index 289c38991..802859e87 100644
--- a/docs/source/building_applications/rag.md
+++ b/docs/source/building_applications/rag.md
@@ -93,10 +93,31 @@ chunks_response = client.vector_io.query(
### Using the RAG Tool
+> **⚠️ DEPRECATION NOTICE**: The RAG Tool is being deprecated in favor of directly using the OpenAI-compatible Search
+> API. We recommend migrating to the OpenAI APIs for better compatibility and future support.
+
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
[appendix](#more-ragdocument-examples).
+#### OpenAI API Integration & Migration
+
+The RAG tool has been updated to use OpenAI-compatible APIs. This provides several benefits:
+
+- **Files API Integration**: Documents are now uploaded using OpenAI's file upload endpoints
+- **Vector Stores API**: Vector storage operations use OpenAI's vector store format with configurable chunking strategies
+- **Error Resilience:** When processing multiple documents, individual failures are logged but don't crash the operation. Failed documents are skipped while successful ones continue processing.
+
+**Migration Path:**
+We recommend migrating to the OpenAI-compatible Search API for:
+1. **Better OpenAI Ecosystem Integration**: Direct compatibility with OpenAI tools and workflows including the Responses API
+2**Future-Proof**: Continued support and feature development
+3**Full OpenAI Compatibility**: Vector Stores, Files, and Search APIs are fully compatible with OpenAI's Responses API
+
+The OpenAI APIs are used under the hood, so you can continue to use your existing RAG Tool code with minimal changes.
+However, we recommend updating your code to use the new OpenAI-compatible APIs for better long-term support. If any
+documents fail to process, they will be logged in the response but will not cause the entire operation to fail.
+
```python
from llama_stack_client import RAGDocument
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 3f84d1310..0cbddef31 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -131,6 +131,7 @@ html_static_path = ["../_static"]
def setup(app):
app.add_css_file("css/my_theme.css")
app.add_js_file("js/detect_theme.js")
+ app.add_js_file("js/horizontal_nav.js")
app.add_js_file("js/keyboard_shortcuts.js")
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md
index 1846f4d97..71c3bd5a6 100644
--- a/docs/source/contributing/index.md
+++ b/docs/source/contributing/index.md
@@ -35,5 +35,5 @@ testing/record-replay
### Benchmarking
-```{include} ../../../docs/source/distributions/k8s-benchmark/README.md
+```{include} ../../../benchmarking/k8s-benchmark/README.md
```
diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
index b6d215474..c5720daef 100644
--- a/docs/source/providers/inference/index.md
+++ b/docs/source/providers/inference/index.md
@@ -18,6 +18,7 @@ This section contains documentation for all available providers for the **infere
inline_meta-reference
inline_sentence-transformers
remote_anthropic
+remote_azure
remote_bedrock
remote_cerebras
remote_databricks
diff --git a/docs/source/providers/inference/remote_azure.md b/docs/source/providers/inference/remote_azure.md
new file mode 100644
index 000000000..19f8f418b
--- /dev/null
+++ b/docs/source/providers/inference/remote_azure.md
@@ -0,0 +1,29 @@
+# remote::azure
+
+## Description
+
+
+Azure OpenAI inference provider for accessing GPT models and other Azure services.
+Provider documentation
+https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
+
+
+## Configuration
+
+| Field | Type | Required | Default | Description |
+|-------|------|----------|---------|-------------|
+| `api_key` | `` | No | | Azure API key for Azure |
+| `api_base` | `` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
+| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
+| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
+
+## Sample Configuration
+
+```yaml
+api_key: ${env.AZURE_API_KEY:=}
+api_base: ${env.AZURE_API_BASE:=}
+api_version: ${env.AZURE_API_VERSION:=}
+api_type: ${env.AZURE_API_TYPE:=}
+
+```
+
diff --git a/llama_stack/cli/verify_download.py b/llama_stack/cli/verify_download.py
index b7f4cfdb5..e738abb4f 100644
--- a/llama_stack/cli/verify_download.py
+++ b/llama_stack/cli/verify_download.py
@@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
-def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
- # NOTE: MD5 is used here only for download integrity verification,
- # not for security purposes
- # TODO: switch to SHA256
- md5_hash = hashlib.md5(usedforsecurity=False)
+def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
+ sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for chunk in iter(lambda: f.read(chunk_size), b""):
- md5_hash.update(chunk)
- return md5_hash.hexdigest()
+ sha256_hash.update(chunk)
+ return sha256_hash.hexdigest()
def load_checksums(checklist_path: Path) -> dict[str, str]:
@@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
with open(checklist_path) as f:
for line in f:
if line.strip():
- md5sum, filepath = line.strip().split(" ", 1)
+ sha256sum, filepath = line.strip().split(" ", 1)
# Remove leading './' if present
filepath = filepath.lstrip("./")
- checksums[filepath] = md5sum
+ checksums[filepath] = sha256sum
return checksums
@@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
matches = False
if exists:
- actual_hash = calculate_md5(full_path)
+ actual_hash = calculate_sha256(full_path)
matches = actual_hash == expected_hash
results.append(
diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py
index 0f348b067..faaeefd01 100644
--- a/llama_stack/core/datatypes.py
+++ b/llama_stack/core/datatypes.py
@@ -431,6 +431,12 @@ class ServerConfig(BaseModel):
)
+class InferenceStoreConfig(BaseModel):
+ sql_store_config: SqlStoreConfig
+ max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
+ num_writers: int = Field(default=4, description="Number of concurrent background writers")
+
+
class StackRunConfig(BaseModel):
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
@@ -464,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
a default SQLite store will be used.""",
)
- inference_store: SqlStoreConfig | None = Field(
+ inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
default=None,
description="""
-Configuration for the persistence store used by the inference API. If not specified,
-a default SQLite store will be used.""",
+Configuration for the persistence store used by the inference API. Can be either a
+InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
+If not specified, a default SQLite store will be used.""",
)
# registry of "resources" in the distribution
diff --git a/llama_stack/core/routers/__init__.py b/llama_stack/core/routers/__init__.py
index 1faace34a..f129f8ede 100644
--- a/llama_stack/core/routers/__init__.py
+++ b/llama_stack/core/routers/__init__.py
@@ -78,7 +78,10 @@ async def get_auto_router_impl(
# TODO: move pass configs to routers instead
if api == Api.inference and run_config.inference_store:
- inference_store = InferenceStore(run_config.inference_store, policy)
+ inference_store = InferenceStore(
+ config=run_config.inference_store,
+ policy=policy,
+ )
await inference_store.initialize()
api_to_dep_impl["store"] = inference_store
diff --git a/llama_stack/core/routers/inference.py b/llama_stack/core/routers/inference.py
index 045093fe0..762d7073e 100644
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@@ -63,7 +63,7 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
from llama_stack.providers.utils.inference.inference_store import InferenceStore
-from llama_stack.providers.utils.telemetry.tracing import get_current_span
+from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
logger = get_logger(name=__name__, category="core::routers")
@@ -90,6 +90,11 @@ class InferenceRouter(Inference):
async def shutdown(self) -> None:
logger.debug("InferenceRouter.shutdown")
+ if self.store:
+ try:
+ await self.store.shutdown()
+ except Exception as e:
+ logger.warning(f"Error during InferenceStore shutdown: {e}")
async def register_model(
self,
@@ -160,7 +165,7 @@ class InferenceRouter(Inference):
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
if self.telemetry:
for metric in metrics:
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
async def _count_tokens(
@@ -431,7 +436,7 @@ class InferenceRouter(Inference):
model=model_obj,
)
for metric in metrics:
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
# these metrics will show up in the client response.
response.metrics = (
@@ -537,7 +542,7 @@ class InferenceRouter(Inference):
model=model_obj,
)
for metric in metrics:
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
# these metrics will show up in the client response.
response.metrics = (
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
@@ -664,7 +669,7 @@ class InferenceRouter(Inference):
"completion_tokens",
"total_tokens",
]: # Only log completion and total tokens
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
# Return metrics in response
async_metrics = [
@@ -710,7 +715,7 @@ class InferenceRouter(Inference):
)
for metric in completion_metrics:
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
# Return metrics in response
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
@@ -806,7 +811,7 @@ class InferenceRouter(Inference):
model=model,
)
for metric in metrics:
- await self.telemetry.log_event(metric)
+ enqueue_event(metric)
yield chunk
finally:
diff --git a/llama_stack/distributions/ci-tests/build.yaml b/llama_stack/distributions/ci-tests/build.yaml
index 8e6c0bf67..a4d920cd6 100644
--- a/llama_stack/distributions/ci-tests/build.yaml
+++ b/llama_stack/distributions/ci-tests/build.yaml
@@ -17,6 +17,7 @@ distribution_spec:
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
+ - provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
diff --git a/llama_stack/distributions/ci-tests/run.yaml b/llama_stack/distributions/ci-tests/run.yaml
index 26a677c7a..a478a3872 100644
--- a/llama_stack/distributions/ci-tests/run.yaml
+++ b/llama_stack/distributions/ci-tests/run.yaml
@@ -81,6 +81,13 @@ providers:
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
+ - provider_id: ${env.AZURE_API_KEY:+azure}
+ provider_type: remote::azure
+ config:
+ api_key: ${env.AZURE_API_KEY:=}
+ api_base: ${env.AZURE_API_BASE:=}
+ api_version: ${env.AZURE_API_VERSION:=}
+ api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
diff --git a/llama_stack/distributions/starter-gpu/build.yaml b/llama_stack/distributions/starter-gpu/build.yaml
index ff7c58e6f..05a2bf180 100644
--- a/llama_stack/distributions/starter-gpu/build.yaml
+++ b/llama_stack/distributions/starter-gpu/build.yaml
@@ -18,6 +18,7 @@ distribution_spec:
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
+ - provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
diff --git a/llama_stack/distributions/starter-gpu/run.yaml b/llama_stack/distributions/starter-gpu/run.yaml
index 5d9dfcb27..786506706 100644
--- a/llama_stack/distributions/starter-gpu/run.yaml
+++ b/llama_stack/distributions/starter-gpu/run.yaml
@@ -81,6 +81,13 @@ providers:
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
+ - provider_id: ${env.AZURE_API_KEY:+azure}
+ provider_type: remote::azure
+ config:
+ api_key: ${env.AZURE_API_KEY:=}
+ api_base: ${env.AZURE_API_BASE:=}
+ api_version: ${env.AZURE_API_VERSION:=}
+ api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
diff --git a/llama_stack/distributions/starter/build.yaml b/llama_stack/distributions/starter/build.yaml
index e84e528da..2f0cd24fd 100644
--- a/llama_stack/distributions/starter/build.yaml
+++ b/llama_stack/distributions/starter/build.yaml
@@ -18,6 +18,7 @@ distribution_spec:
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
+ - provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
diff --git a/llama_stack/distributions/starter/run.yaml b/llama_stack/distributions/starter/run.yaml
index a3962b8aa..2814b2ced 100644
--- a/llama_stack/distributions/starter/run.yaml
+++ b/llama_stack/distributions/starter/run.yaml
@@ -81,6 +81,13 @@ providers:
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
+ - provider_id: ${env.AZURE_API_KEY:+azure}
+ provider_type: remote::azure
+ config:
+ api_key: ${env.AZURE_API_KEY:=}
+ api_base: ${env.AZURE_API_BASE:=}
+ api_version: ${env.AZURE_API_VERSION:=}
+ api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
diff --git a/llama_stack/distributions/starter/starter.py b/llama_stack/distributions/starter/starter.py
index 2fca52700..c2dfe95ad 100644
--- a/llama_stack/distributions/starter/starter.py
+++ b/llama_stack/distributions/starter/starter.py
@@ -59,6 +59,7 @@ ENABLED_INFERENCE_PROVIDERS = [
"cerebras",
"nvidia",
"bedrock",
+ "azure",
]
INFERENCE_PROVIDER_IDS = {
@@ -68,6 +69,7 @@ INFERENCE_PROVIDER_IDS = {
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
+ "azure": "${env.AZURE_API_KEY:+azure}",
}
@@ -277,5 +279,21 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
"http://localhost:11434",
"Ollama URL",
),
+ "AZURE_API_KEY": (
+ "",
+ "Azure API Key",
+ ),
+ "AZURE_API_BASE": (
+ "",
+ "Azure API Base",
+ ),
+ "AZURE_API_VERSION": (
+ "",
+ "Azure API Version",
+ ),
+ "AZURE_API_TYPE": (
+ "azure",
+ "Azure API Type",
+ ),
},
)
diff --git a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
index be18430e4..9bc22f979 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/context_retriever.py
@@ -8,7 +8,7 @@
from jinja2 import Template
from llama_stack.apis.common.content_types import InterleavedContent
-from llama_stack.apis.inference import UserMessage
+from llama_stack.apis.inference import OpenAIUserMessageParam
from llama_stack.apis.tools.rag_tool import (
DefaultRAGQueryGeneratorConfig,
LLMRAGQueryGeneratorConfig,
@@ -61,16 +61,16 @@ async def llm_rag_query_generator(
messages = [interleaved_content_as_str(content)]
template = Template(config.template)
- content = template.render({"messages": messages})
+ rendered_content: str = template.render({"messages": messages})
model = config.model
- message = UserMessage(content=content)
- response = await inference_api.chat_completion(
- model_id=model,
+ message = OpenAIUserMessageParam(content=rendered_content)
+ response = await inference_api.openai_chat_completion(
+ model=model,
messages=[message],
stream=False,
)
- query = response.completion_message.content
+ query = response.choices[0].message.content
return query
diff --git a/llama_stack/providers/inline/tool_runtime/rag/memory.py b/llama_stack/providers/inline/tool_runtime/rag/memory.py
index aa629cca8..bc68f198d 100644
--- a/llama_stack/providers/inline/tool_runtime/rag/memory.py
+++ b/llama_stack/providers/inline/tool_runtime/rag/memory.py
@@ -45,10 +45,7 @@ from llama_stack.apis.vector_io import (
from llama_stack.log import get_logger
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
-from llama_stack.providers.utils.memory.vector_store import (
- content_from_doc,
- parse_data_url,
-)
+from llama_stack.providers.utils.memory.vector_store import parse_data_url
from .config import RagToolRuntimeConfig
from .context_retriever import generate_rag_query
@@ -60,6 +57,47 @@ def make_random_string(length: int = 8):
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
+async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
+ """Get raw binary data and mime type from a RAGDocument for file upload."""
+ if isinstance(doc.content, URL):
+ if doc.content.uri.startswith("data:"):
+ parts = parse_data_url(doc.content.uri)
+ mime_type = parts["mimetype"]
+ data = parts["data"]
+
+ if parts["is_base64"]:
+ file_data = base64.b64decode(data)
+ else:
+ file_data = data.encode("utf-8")
+
+ return file_data, mime_type
+ else:
+ async with httpx.AsyncClient() as client:
+ r = await client.get(doc.content.uri)
+ r.raise_for_status()
+ mime_type = r.headers.get("content-type", "application/octet-stream")
+ return r.content, mime_type
+ else:
+ if isinstance(doc.content, str):
+ content_str = doc.content
+ else:
+ content_str = interleaved_content_as_str(doc.content)
+
+ if content_str.startswith("data:"):
+ parts = parse_data_url(content_str)
+ mime_type = parts["mimetype"]
+ data = parts["data"]
+
+ if parts["is_base64"]:
+ file_data = base64.b64decode(data)
+ else:
+ file_data = data.encode("utf-8")
+
+ return file_data, mime_type
+ else:
+ return content_str.encode("utf-8"), "text/plain"
+
+
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
def __init__(
self,
@@ -95,46 +133,52 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
return
for doc in documents:
- if isinstance(doc.content, URL):
- if doc.content.uri.startswith("data:"):
- parts = parse_data_url(doc.content.uri)
- file_data = base64.b64decode(parts["data"]) if parts["is_base64"] else parts["data"].encode()
- mime_type = parts["mimetype"]
- else:
- async with httpx.AsyncClient() as client:
- response = await client.get(doc.content.uri)
- file_data = response.content
- mime_type = doc.mime_type or response.headers.get("content-type", "application/octet-stream")
- else:
- content_str = await content_from_doc(doc)
- file_data = content_str.encode("utf-8")
- mime_type = doc.mime_type or "text/plain"
+ try:
+ try:
+ file_data, mime_type = await raw_data_from_doc(doc)
+ except Exception as e:
+ log.error(f"Failed to extract content from document {doc.document_id}: {e}")
+ continue
- file_extension = mimetypes.guess_extension(mime_type) or ".txt"
- filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
+ file_extension = mimetypes.guess_extension(mime_type) or ".txt"
+ filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
- file_obj = io.BytesIO(file_data)
- file_obj.name = filename
+ file_obj = io.BytesIO(file_data)
+ file_obj.name = filename
- upload_file = UploadFile(file=file_obj, filename=filename)
+ upload_file = UploadFile(file=file_obj, filename=filename)
- created_file = await self.files_api.openai_upload_file(
- file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
- )
+ try:
+ created_file = await self.files_api.openai_upload_file(
+ file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
+ )
+ except Exception as e:
+ log.error(f"Failed to upload file for document {doc.document_id}: {e}")
+ continue
- chunking_strategy = VectorStoreChunkingStrategyStatic(
- static=VectorStoreChunkingStrategyStaticConfig(
- max_chunk_size_tokens=chunk_size_in_tokens,
- chunk_overlap_tokens=chunk_size_in_tokens // 4,
+ chunking_strategy = VectorStoreChunkingStrategyStatic(
+ static=VectorStoreChunkingStrategyStaticConfig(
+ max_chunk_size_tokens=chunk_size_in_tokens,
+ chunk_overlap_tokens=chunk_size_in_tokens // 4,
+ )
)
- )
- await self.vector_io_api.openai_attach_file_to_vector_store(
- vector_store_id=vector_db_id,
- file_id=created_file.id,
- attributes=doc.metadata,
- chunking_strategy=chunking_strategy,
- )
+ try:
+ await self.vector_io_api.openai_attach_file_to_vector_store(
+ vector_store_id=vector_db_id,
+ file_id=created_file.id,
+ attributes=doc.metadata,
+ chunking_strategy=chunking_strategy,
+ )
+ except Exception as e:
+ log.error(
+ f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
+ )
+ continue
+
+ except Exception as e:
+ log.error(f"Unexpected error processing document {doc.document_id}: {e}")
+ continue
async def query(
self,
@@ -274,7 +318,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
if query_config:
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
else:
- # handle someone passing an empty dict
query_config = RAGQueryConfig()
query = kwargs["query"]
@@ -285,6 +328,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
)
return ToolInvocationResult(
- content=result.content,
+ content=result.content or [],
metadata=result.metadata,
)
diff --git a/llama_stack/providers/registry/batches.py b/llama_stack/providers/registry/batches.py
index de7886efb..a07942486 100644
--- a/llama_stack/providers/registry/batches.py
+++ b/llama_stack/providers/registry/batches.py
@@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.batches,
provider_type="inline::reference",
- pip_packages=["openai"],
+ pip_packages=[],
module="llama_stack.providers.inline.batches.reference",
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
api_dependencies=[
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 541fbb432..64196152b 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -75,7 +75,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="vllm",
- pip_packages=["openai"],
+ pip_packages=[],
module="llama_stack.providers.remote.inference.vllm",
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
description="Remote vLLM inference provider for connecting to vLLM servers.",
@@ -151,9 +151,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="databricks",
- pip_packages=[
- "openai",
- ],
+ pip_packages=[],
module="llama_stack.providers.remote.inference.databricks",
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
@@ -163,9 +161,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="nvidia",
- pip_packages=[
- "openai",
- ],
+ pip_packages=[],
module="llama_stack.providers.remote.inference.nvidia",
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
@@ -175,7 +171,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="runpod",
- pip_packages=["openai"],
+ pip_packages=[],
module="llama_stack.providers.remote.inference.runpod",
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
@@ -207,7 +203,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="gemini",
- pip_packages=["litellm", "openai"],
+ pip_packages=["litellm"],
module="llama_stack.providers.remote.inference.gemini",
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
@@ -218,7 +214,7 @@ def available_providers() -> list[ProviderSpec]:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="vertexai",
- pip_packages=["litellm", "google-cloud-aiplatform", "openai"],
+ pip_packages=["litellm", "google-cloud-aiplatform"],
module="llama_stack.providers.remote.inference.vertexai",
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
@@ -248,7 +244,7 @@ Available Models:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="groq",
- pip_packages=["litellm", "openai"],
+ pip_packages=["litellm"],
module="llama_stack.providers.remote.inference.groq",
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
@@ -270,7 +266,7 @@ Available Models:
api=Api.inference,
adapter=AdapterSpec(
adapter_type="sambanova",
- pip_packages=["litellm", "openai"],
+ pip_packages=["litellm"],
module="llama_stack.providers.remote.inference.sambanova",
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
@@ -299,4 +295,19 @@ Available Models:
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
),
),
+ remote_provider_spec(
+ api=Api.inference,
+ adapter=AdapterSpec(
+ adapter_type="azure",
+ pip_packages=["litellm"],
+ module="llama_stack.providers.remote.inference.azure",
+ config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
+ provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
+ description="""
+Azure OpenAI inference provider for accessing GPT models and other Azure services.
+Provider documentation
+https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
+""",
+ ),
+ ),
]
diff --git a/llama_stack/providers/registry/scoring.py b/llama_stack/providers/registry/scoring.py
index 79293d888..a4ec54ed2 100644
--- a/llama_stack/providers/registry/scoring.py
+++ b/llama_stack/providers/registry/scoring.py
@@ -38,7 +38,7 @@ def available_providers() -> list[ProviderSpec]:
InlineProviderSpec(
api=Api.scoring,
provider_type="inline::braintrust",
- pip_packages=["autoevals", "openai"],
+ pip_packages=["autoevals"],
module="llama_stack.providers.inline.scoring.braintrust",
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
api_dependencies=[
diff --git a/llama_stack/providers/remote/inference/azure/__init__.py b/llama_stack/providers/remote/inference/azure/__init__.py
new file mode 100644
index 000000000..87bcaf309
--- /dev/null
+++ b/llama_stack/providers/remote/inference/azure/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .config import AzureConfig
+
+
+async def get_adapter_impl(config: AzureConfig, _deps):
+ from .azure import AzureInferenceAdapter
+
+ impl = AzureInferenceAdapter(config)
+ await impl.initialize()
+ return impl
diff --git a/llama_stack/providers/remote/inference/azure/azure.py b/llama_stack/providers/remote/inference/azure/azure.py
new file mode 100644
index 000000000..449bbbb1c
--- /dev/null
+++ b/llama_stack/providers/remote/inference/azure/azure.py
@@ -0,0 +1,64 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any
+from urllib.parse import urljoin
+
+from llama_stack.apis.inference import ChatCompletionRequest
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+ LiteLLMOpenAIMixin,
+)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
+
+from .config import AzureConfig
+from .models import MODEL_ENTRIES
+
+
+class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
+ def __init__(self, config: AzureConfig) -> None:
+ LiteLLMOpenAIMixin.__init__(
+ self,
+ MODEL_ENTRIES,
+ litellm_provider_name="azure",
+ api_key_from_config=config.api_key.get_secret_value(),
+ provider_data_api_key_field="azure_api_key",
+ openai_compat_api_base=str(config.api_base),
+ )
+ self.config = config
+
+ # Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
+ get_api_key = LiteLLMOpenAIMixin.get_api_key
+
+ def get_base_url(self) -> str:
+ """
+ Get the Azure API base URL.
+
+ Returns the Azure API base URL from the configuration.
+ """
+ return urljoin(str(self.config.api_base), "/openai/v1")
+
+ async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
+ # Get base parameters from parent
+ params = await super()._get_params(request)
+
+ # Add Azure specific parameters
+ provider_data = self.get_request_provider_data()
+ if provider_data:
+ if getattr(provider_data, "azure_api_key", None):
+ params["api_key"] = provider_data.azure_api_key
+ if getattr(provider_data, "azure_api_base", None):
+ params["api_base"] = provider_data.azure_api_base
+ if getattr(provider_data, "azure_api_version", None):
+ params["api_version"] = provider_data.azure_api_version
+ if getattr(provider_data, "azure_api_type", None):
+ params["api_type"] = provider_data.azure_api_type
+ else:
+ params["api_key"] = self.config.api_key.get_secret_value()
+ params["api_base"] = str(self.config.api_base)
+ params["api_version"] = self.config.api_version
+ params["api_type"] = self.config.api_type
+
+ return params
diff --git a/llama_stack/providers/remote/inference/azure/config.py b/llama_stack/providers/remote/inference/azure/config.py
new file mode 100644
index 000000000..fe9d61d53
--- /dev/null
+++ b/llama_stack/providers/remote/inference/azure/config.py
@@ -0,0 +1,63 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import os
+from typing import Any
+
+from pydantic import BaseModel, Field, HttpUrl, SecretStr
+
+from llama_stack.schema_utils import json_schema_type
+
+
+class AzureProviderDataValidator(BaseModel):
+ azure_api_key: SecretStr = Field(
+ description="Azure API key for Azure",
+ )
+ azure_api_base: HttpUrl = Field(
+ description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+ )
+ azure_api_version: str | None = Field(
+ default=None,
+ description="Azure API version for Azure (e.g., 2024-06-01)",
+ )
+ azure_api_type: str | None = Field(
+ default="azure",
+ description="Azure API type for Azure (e.g., azure)",
+ )
+
+
+@json_schema_type
+class AzureConfig(BaseModel):
+ api_key: SecretStr = Field(
+ description="Azure API key for Azure",
+ )
+ api_base: HttpUrl = Field(
+ description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
+ )
+ api_version: str | None = Field(
+ default_factory=lambda: os.getenv("AZURE_API_VERSION"),
+ description="Azure API version for Azure (e.g., 2024-12-01-preview)",
+ )
+ api_type: str | None = Field(
+ default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"),
+ description="Azure API type for Azure (e.g., azure)",
+ )
+
+ @classmethod
+ def sample_run_config(
+ cls,
+ api_key: str = "${env.AZURE_API_KEY:=}",
+ api_base: str = "${env.AZURE_API_BASE:=}",
+ api_version: str = "${env.AZURE_API_VERSION:=}",
+ api_type: str = "${env.AZURE_API_TYPE:=}",
+ **kwargs,
+ ) -> dict[str, Any]:
+ return {
+ "api_key": api_key,
+ "api_base": api_base,
+ "api_version": api_version,
+ "api_type": api_type,
+ }
diff --git a/llama_stack/providers/remote/inference/azure/models.py b/llama_stack/providers/remote/inference/azure/models.py
new file mode 100644
index 000000000..64c87969b
--- /dev/null
+++ b/llama_stack/providers/remote/inference/azure/models.py
@@ -0,0 +1,28 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.utils.inference.model_registry import (
+ ProviderModelEntry,
+)
+
+# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions
+LLM_MODEL_IDS = [
+ "gpt-5",
+ "gpt-5-mini",
+ "gpt-5-nano",
+ "gpt-5-chat",
+ "o1",
+ "o1-mini",
+ "o3-mini",
+ "o4-mini",
+ "gpt-4.1",
+ "gpt-4.1-mini",
+ "gpt-4.1-nano",
+]
+
+SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
+
+MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
diff --git a/llama_stack/providers/remote/inference/bedrock/bedrock.py b/llama_stack/providers/remote/inference/bedrock/bedrock.py
index 63ea196f6..106caed9b 100644
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@@ -53,6 +53,43 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
from .models import MODEL_ENTRIES
+REGION_PREFIX_MAP = {
+ "us": "us.",
+ "eu": "eu.",
+ "ap": "ap.",
+}
+
+
+def _get_region_prefix(region: str | None) -> str:
+ # AWS requires region prefixes for inference profiles
+ if region is None:
+ return "us." # default to US when we don't know
+
+ # Handle case insensitive region matching
+ region_lower = region.lower()
+ for prefix in REGION_PREFIX_MAP:
+ if region_lower.startswith(f"{prefix}-"):
+ return REGION_PREFIX_MAP[prefix]
+
+ # Fallback to US for anything we don't recognize
+ return "us."
+
+
+def _to_inference_profile_id(model_id: str, region: str = None) -> str:
+ # Return ARNs unchanged
+ if model_id.startswith("arn:"):
+ return model_id
+
+ # Return inference profile IDs that already have regional prefixes
+ if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
+ return model_id
+
+ # Default to US East when no region is provided
+ if region is None:
+ region = "us-east-1"
+
+ return _get_region_prefix(region) + model_id
+
class BedrockInferenceAdapter(
ModelRegistryHelper,
@@ -166,8 +203,13 @@ class BedrockInferenceAdapter(
options["repetition_penalty"] = sampling_params.repetition_penalty
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
+
+ # Convert foundation model ID to inference profile ID
+ region_name = self.client.meta.region_name
+ inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
+
return {
- "modelId": bedrock_model,
+ "modelId": inference_profile_id,
"body": json.dumps(
{
"prompt": prompt,
@@ -185,6 +227,11 @@ class BedrockInferenceAdapter(
task_type: EmbeddingTaskType | None = None,
) -> EmbeddingsResponse:
model = await self.model_store.get_model(model_id)
+
+ # Convert foundation model ID to inference profile ID
+ region_name = self.client.meta.region_name
+ inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
+
embeddings = []
for content in contents:
assert not content_has_media(content), "Bedrock does not support media for embeddings"
@@ -193,7 +240,7 @@ class BedrockInferenceAdapter(
body = json.dumps(input_body)
response = self.client.invoke_model(
body=body,
- modelId=model.provider_resource_id,
+ modelId=inference_profile_id,
accept="application/json",
contentType="application/json",
)
diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py
index 9e9a80ca5..77f5d82af 100644
--- a/llama_stack/providers/remote/inference/vllm/vllm.py
+++ b/llama_stack/providers/remote/inference/vllm/vllm.py
@@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import json
-from collections.abc import AsyncGenerator, AsyncIterator
+from collections.abc import AsyncGenerator
from typing import Any
import httpx
@@ -38,13 +38,6 @@ from llama_stack.apis.inference import (
LogProbConfig,
Message,
ModelStore,
- OpenAIChatCompletion,
- OpenAICompletion,
- OpenAIEmbeddingData,
- OpenAIEmbeddingsResponse,
- OpenAIEmbeddingUsage,
- OpenAIMessageParam,
- OpenAIResponseFormatParam,
ResponseFormat,
SamplingParams,
TextTruncation,
@@ -71,11 +64,11 @@ from llama_stack.providers.utils.inference.openai_compat import (
convert_message_to_openai_dict,
convert_tool_call,
get_sampling_options,
- prepare_openai_completion_params,
process_chat_completion_stream_response,
process_completion_response,
process_completion_stream_response,
)
+from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack.providers.utils.inference.prompt_adapter import (
completion_request_to_prompt,
content_has_media,
@@ -288,7 +281,7 @@ async def _process_vllm_chat_completion_stream_response(
yield c
-class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
+class VLLMInferenceAdapter(OpenAIMixin, Inference, ModelsProtocolPrivate):
# automatically set by the resolver when instantiating the provider
__provider_id__: str
model_store: ModelStore | None = None
@@ -296,7 +289,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
self.config = config
- self.client = None
async def initialize(self) -> None:
if not self.config.url:
@@ -308,8 +300,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
return self.config.refresh_models
async def list_models(self) -> list[Model] | None:
- self._lazy_initialize_client()
- assert self.client is not None # mypy
models = []
async for m in self.client.models.list():
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
@@ -340,8 +330,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
HealthResponse: A dictionary containing the health status.
"""
try:
- client = self._create_client() if self.client is None else self.client
- _ = [m async for m in client.models.list()] # Ensure the client is initialized
+ _ = [m async for m in self.client.models.list()] # Ensure the client is initialized
return HealthResponse(status=HealthStatus.OK)
except Exception as e:
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
@@ -351,19 +340,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
raise ValueError("Model store not set")
return await self.model_store.get_model(model_id)
- def _lazy_initialize_client(self):
- if self.client is not None:
- return
+ def get_api_key(self):
+ return self.config.api_token
- log.info(f"Initializing vLLM client with base_url={self.config.url}")
- self.client = self._create_client()
+ def get_base_url(self):
+ return self.config.url
- def _create_client(self):
- return AsyncOpenAI(
- base_url=self.config.url,
- api_key=self.config.api_token,
- http_client=httpx.AsyncClient(verify=self.config.tls_verify),
- )
+ def get_extra_client_params(self):
+ return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
async def completion(
self,
@@ -374,7 +358,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
stream: bool | None = False,
logprobs: LogProbConfig | None = None,
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
- self._lazy_initialize_client()
if sampling_params is None:
sampling_params = SamplingParams()
model = await self._get_model(model_id)
@@ -406,7 +389,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
logprobs: LogProbConfig | None = None,
tool_config: ToolConfig | None = None,
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
- self._lazy_initialize_client()
if sampling_params is None:
sampling_params = SamplingParams()
model = await self._get_model(model_id)
@@ -479,16 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
yield chunk
async def register_model(self, model: Model) -> Model:
- # register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
- # self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
- # Changing this may lead to unpredictable behavior.
- client = self._create_client() if self.client is None else self.client
try:
model = await self.register_helper.register_model(model)
except ValueError:
pass # Ignore statically unknown model, will check live listing
try:
- res = await client.models.list()
+ res = await self.client.models.list()
except APIConnectionError as e:
raise ValueError(
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
@@ -543,8 +521,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
output_dimension: int | None = None,
task_type: EmbeddingTaskType | None = None,
) -> EmbeddingsResponse:
- self._lazy_initialize_client()
- assert self.client is not None
model = await self._get_model(model_id)
kwargs = {}
@@ -560,154 +536,3 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
embeddings = [data.embedding for data in response.data]
return EmbeddingsResponse(embeddings=embeddings)
-
- async def openai_embeddings(
- self,
- model: str,
- input: str | list[str],
- encoding_format: str | None = "float",
- dimensions: int | None = None,
- user: str | None = None,
- ) -> OpenAIEmbeddingsResponse:
- self._lazy_initialize_client()
- assert self.client is not None
- model_obj = await self._get_model(model)
- assert model_obj.model_type == ModelType.embedding
-
- # Convert input to list if it's a string
- input_list = [input] if isinstance(input, str) else input
-
- # Call vLLM embeddings endpoint with encoding_format
- response = await self.client.embeddings.create(
- model=model_obj.provider_resource_id,
- input=input_list,
- dimensions=dimensions,
- encoding_format=encoding_format,
- )
-
- # Convert response to OpenAI format
- data = [
- OpenAIEmbeddingData(
- embedding=embedding_data.embedding,
- index=i,
- )
- for i, embedding_data in enumerate(response.data)
- ]
-
- # Not returning actual token usage since vLLM doesn't provide it
- usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
-
- return OpenAIEmbeddingsResponse(
- data=data,
- model=model_obj.provider_resource_id,
- usage=usage,
- )
-
- async def openai_completion(
- self,
- model: str,
- prompt: str | list[str] | list[int] | list[list[int]],
- best_of: int | None = None,
- echo: bool | None = None,
- frequency_penalty: float | None = None,
- logit_bias: dict[str, float] | None = None,
- logprobs: bool | None = None,
- max_tokens: int | None = None,
- n: int | None = None,
- presence_penalty: float | None = None,
- seed: int | None = None,
- stop: str | list[str] | None = None,
- stream: bool | None = None,
- stream_options: dict[str, Any] | None = None,
- temperature: float | None = None,
- top_p: float | None = None,
- user: str | None = None,
- guided_choice: list[str] | None = None,
- prompt_logprobs: int | None = None,
- suffix: str | None = None,
- ) -> OpenAICompletion:
- self._lazy_initialize_client()
- model_obj = await self._get_model(model)
-
- extra_body: dict[str, Any] = {}
- if prompt_logprobs is not None and prompt_logprobs >= 0:
- extra_body["prompt_logprobs"] = prompt_logprobs
- if guided_choice:
- extra_body["guided_choice"] = guided_choice
-
- params = await prepare_openai_completion_params(
- model=model_obj.provider_resource_id,
- prompt=prompt,
- best_of=best_of,
- echo=echo,
- frequency_penalty=frequency_penalty,
- logit_bias=logit_bias,
- logprobs=logprobs,
- max_tokens=max_tokens,
- n=n,
- presence_penalty=presence_penalty,
- seed=seed,
- stop=stop,
- stream=stream,
- stream_options=stream_options,
- temperature=temperature,
- top_p=top_p,
- user=user,
- extra_body=extra_body,
- )
- return await self.client.completions.create(**params) # type: ignore
-
- async def openai_chat_completion(
- self,
- model: str,
- messages: list[OpenAIMessageParam],
- frequency_penalty: float | None = None,
- function_call: str | dict[str, Any] | None = None,
- functions: list[dict[str, Any]] | None = None,
- logit_bias: dict[str, float] | None = None,
- logprobs: bool | None = None,
- max_completion_tokens: int | None = None,
- max_tokens: int | None = None,
- n: int | None = None,
- parallel_tool_calls: bool | None = None,
- presence_penalty: float | None = None,
- response_format: OpenAIResponseFormatParam | None = None,
- seed: int | None = None,
- stop: str | list[str] | None = None,
- stream: bool | None = None,
- stream_options: dict[str, Any] | None = None,
- temperature: float | None = None,
- tool_choice: str | dict[str, Any] | None = None,
- tools: list[dict[str, Any]] | None = None,
- top_logprobs: int | None = None,
- top_p: float | None = None,
- user: str | None = None,
- ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
- self._lazy_initialize_client()
- model_obj = await self._get_model(model)
- params = await prepare_openai_completion_params(
- model=model_obj.provider_resource_id,
- messages=messages,
- frequency_penalty=frequency_penalty,
- function_call=function_call,
- functions=functions,
- logit_bias=logit_bias,
- logprobs=logprobs,
- max_completion_tokens=max_completion_tokens,
- max_tokens=max_tokens,
- n=n,
- parallel_tool_calls=parallel_tool_calls,
- presence_penalty=presence_penalty,
- response_format=response_format,
- seed=seed,
- stop=stop,
- stream=stream,
- stream_options=stream_options,
- temperature=temperature,
- tool_choice=tool_choice,
- tools=tools,
- top_logprobs=top_logprobs,
- top_p=top_p,
- user=user,
- )
- return await self.client.chat.completions.create(**params) # type: ignore
diff --git a/llama_stack/providers/utils/inference/inference_store.py b/llama_stack/providers/utils/inference/inference_store.py
index 43006cfd5..17f4c6268 100644
--- a/llama_stack/providers/utils/inference/inference_store.py
+++ b/llama_stack/providers/utils/inference/inference_store.py
@@ -3,6 +3,11 @@
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
+import asyncio
+from typing import Any
+
+from sqlalchemy.exc import IntegrityError
+
from llama_stack.apis.inference import (
ListOpenAIChatCompletionResponse,
OpenAIChatCompletion,
@@ -10,24 +15,43 @@ from llama_stack.apis.inference import (
OpenAIMessageParam,
Order,
)
-from llama_stack.core.datatypes import AccessRule
-from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
+from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig
+from llama_stack.log import get_logger
from ..sqlstore.api import ColumnDefinition, ColumnType
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
-from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
+from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl
+
+logger = get_logger(name=__name__, category="inference_store")
class InferenceStore:
- def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]):
- if not sql_store_config:
- sql_store_config = SqliteSqlStoreConfig(
- db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
+ def __init__(
+ self,
+ config: InferenceStoreConfig | SqlStoreConfig,
+ policy: list[AccessRule],
+ ):
+ # Handle backward compatibility
+ if not isinstance(config, InferenceStoreConfig):
+ # Legacy: SqlStoreConfig passed directly as config
+ config = InferenceStoreConfig(
+ sql_store_config=config,
)
- self.sql_store_config = sql_store_config
+
+ self.config = config
+ self.sql_store_config = config.sql_store_config
self.sql_store = None
self.policy = policy
+ # Disable write queue for SQLite to avoid concurrency issues
+ self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
+
+ # Async write queue and worker control
+ self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
+ self._worker_tasks: list[asyncio.Task[Any]] = []
+ self._max_write_queue_size: int = config.max_write_queue_size
+ self._num_writers: int = max(1, config.num_writers)
+
async def initialize(self):
"""Create the necessary tables if they don't exist."""
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
@@ -42,23 +66,109 @@ class InferenceStore:
},
)
+ if self.enable_write_queue:
+ self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
+ for _ in range(self._num_writers):
+ self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
+ else:
+ logger.info("Write queue disabled for SQLite to avoid concurrency issues")
+
+ async def shutdown(self) -> None:
+ if not self._worker_tasks:
+ return
+ if self._queue is not None:
+ await self._queue.join()
+ for t in self._worker_tasks:
+ if not t.done():
+ t.cancel()
+ for t in self._worker_tasks:
+ try:
+ await t
+ except asyncio.CancelledError:
+ pass
+ self._worker_tasks.clear()
+
+ async def flush(self) -> None:
+ """Wait for all queued writes to complete. Useful for testing."""
+ if self.enable_write_queue and self._queue is not None:
+ await self._queue.join()
+
async def store_chat_completion(
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
) -> None:
- if not self.sql_store:
+ if self.enable_write_queue:
+ if self._queue is None:
+ raise ValueError("Inference store is not initialized")
+ try:
+ self._queue.put_nowait((chat_completion, input_messages))
+ except asyncio.QueueFull:
+ logger.warning(
+ f"Write queue full; adding chat completion id={getattr(chat_completion, 'id', '')}"
+ )
+ await self._queue.put((chat_completion, input_messages))
+ else:
+ await self._write_chat_completion(chat_completion, input_messages)
+
+ async def _worker_loop(self) -> None:
+ assert self._queue is not None
+ while True:
+ try:
+ item = await self._queue.get()
+ except asyncio.CancelledError:
+ break
+ chat_completion, input_messages = item
+ try:
+ await self._write_chat_completion(chat_completion, input_messages)
+ except Exception as e: # noqa: BLE001
+ logger.error(f"Error writing chat completion: {e}")
+ finally:
+ self._queue.task_done()
+
+ async def _write_chat_completion(
+ self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
+ ) -> None:
+ if self.sql_store is None:
raise ValueError("Inference store is not initialized")
data = chat_completion.model_dump()
+ record_data = {
+ "id": data["id"],
+ "created": data["created"],
+ "model": data["model"],
+ "choices": data["choices"],
+ "input_messages": [message.model_dump() for message in input_messages],
+ }
- await self.sql_store.insert(
- table="chat_completions",
- data={
- "id": data["id"],
- "created": data["created"],
- "model": data["model"],
- "choices": data["choices"],
- "input_messages": [message.model_dump() for message in input_messages],
- },
+ try:
+ await self.sql_store.insert(
+ table="chat_completions",
+ data=record_data,
+ )
+ except IntegrityError as e:
+ # Duplicate chat completion IDs can be generated during tests especially if they are replaying
+ # recorded responses across different tests. No need to warn or error under those circumstances.
+ # In the wild, this is not likely to happen at all (no evidence) so we aren't really hiding any problem.
+
+ # Check if it's a unique constraint violation
+ error_message = str(e.orig) if e.orig else str(e)
+ if self._is_unique_constraint_error(error_message):
+ # Update the existing record instead
+ await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
+ else:
+ # Re-raise if it's not a unique constraint error
+ raise
+
+ def _is_unique_constraint_error(self, error_message: str) -> bool:
+ """Check if the error is specifically a unique constraint violation."""
+ error_lower = error_message.lower()
+ return any(
+ indicator in error_lower
+ for indicator in [
+ "unique constraint failed", # SQLite
+ "duplicate key", # PostgreSQL
+ "unique violation", # PostgreSQL alternative
+ "duplicate entry", # MySQL
+ ]
)
async def list_chat_completions(
diff --git a/llama_stack/providers/utils/inference/openai_mixin.py b/llama_stack/providers/utils/inference/openai_mixin.py
index f60deee6e..a3c0ffadc 100644
--- a/llama_stack/providers/utils/inference/openai_mixin.py
+++ b/llama_stack/providers/utils/inference/openai_mixin.py
@@ -67,6 +67,17 @@ class OpenAIMixin(ABC):
"""
pass
+ def get_extra_client_params(self) -> dict[str, Any]:
+ """
+ Get any extra parameters to pass to the AsyncOpenAI client.
+
+ Child classes can override this method to provide additional parameters
+ such as timeout settings, proxies, etc.
+
+ :return: A dictionary of extra parameters
+ """
+ return {}
+
@property
def client(self) -> AsyncOpenAI:
"""
@@ -78,6 +89,7 @@ class OpenAIMixin(ABC):
return AsyncOpenAI(
api_key=self.get_api_key(),
base_url=self.get_base_url(),
+ **self.get_extra_client_params(),
)
async def _get_provider_model_id(self, model: str) -> str:
@@ -124,10 +136,15 @@ class OpenAIMixin(ABC):
"""
Direct OpenAI completion API call.
"""
- if guided_choice is not None:
- logger.warning("guided_choice is not supported by the OpenAI API. Ignoring.")
- if prompt_logprobs is not None:
- logger.warning("prompt_logprobs is not supported by the OpenAI API. Ignoring.")
+ # Handle parameters that are not supported by OpenAI API, but may be by the provider
+ # prompt_logprobs is supported by vLLM
+ # guided_choice is supported by vLLM
+ # TODO: test coverage
+ extra_body: dict[str, Any] = {}
+ if prompt_logprobs is not None and prompt_logprobs >= 0:
+ extra_body["prompt_logprobs"] = prompt_logprobs
+ if guided_choice:
+ extra_body["guided_choice"] = guided_choice
# TODO: fix openai_completion to return type compatible with OpenAI's API response
return await self.client.completions.create( # type: ignore[no-any-return]
@@ -150,7 +167,8 @@ class OpenAIMixin(ABC):
top_p=top_p,
user=user,
suffix=suffix,
- )
+ ),
+ extra_body=extra_body,
)
async def openai_chat_completion(
diff --git a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
index 867ba2f55..acb688f96 100644
--- a/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
+++ b/llama_stack/providers/utils/sqlstore/authorized_sqlstore.py
@@ -172,6 +172,20 @@ class AuthorizedSqlStore:
return results.data[0] if results.data else None
+ async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None:
+ """Update rows with automatic access control attribute capture."""
+ enhanced_data = dict(data)
+
+ current_user = get_authenticated_user()
+ if current_user:
+ enhanced_data["owner_principal"] = current_user.principal
+ enhanced_data["access_attributes"] = current_user.attributes
+ else:
+ enhanced_data["owner_principal"] = None
+ enhanced_data["access_attributes"] = None
+
+ await self.sql_store.update(table, enhanced_data, where)
+
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
"""Delete rows with automatic access control filtering."""
await self.sql_store.delete(table, where)
diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py
index 7694003b5..9969b1055 100644
--- a/llama_stack/providers/utils/telemetry/tracing.py
+++ b/llama_stack/providers/utils/telemetry/tracing.py
@@ -18,6 +18,7 @@ from functools import wraps
from typing import Any
from llama_stack.apis.telemetry import (
+ Event,
LogSeverity,
Span,
SpanEndPayload,
@@ -98,7 +99,7 @@ class BackgroundLogger:
def __init__(self, api: Telemetry, capacity: int = 100000):
self.api = api
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
- self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
+ self.worker_thread = threading.Thread(target=self._worker, daemon=True)
self.worker_thread.start()
self._last_queue_full_log_time: float = 0.0
self._dropped_since_last_notice: int = 0
@@ -118,12 +119,16 @@ class BackgroundLogger:
self._last_queue_full_log_time = current_time
self._dropped_since_last_notice = 0
- def _process_logs(self):
+ def _worker(self):
+ loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(loop)
+ loop.run_until_complete(self._process_logs())
+
+ async def _process_logs(self):
while True:
try:
event = self.log_queue.get()
- # figure out how to use a thread's native loop
- asyncio.run(self.api.log_event(event))
+ await self.api.log_event(event)
except Exception:
import traceback
@@ -136,6 +141,19 @@ class BackgroundLogger:
self.log_queue.join()
+def enqueue_event(event: Event) -> None:
+ """Enqueue a telemetry event to the background logger if available.
+
+ This provides a non-blocking path for routers and other hot paths to
+ submit telemetry without awaiting the Telemetry API, reducing contention
+ with the main event loop.
+ """
+ global BACKGROUND_LOGGER
+ if BACKGROUND_LOGGER is None:
+ raise RuntimeError("Telemetry API not initialized")
+ BACKGROUND_LOGGER.log_event(event)
+
+
class TraceContext:
spans: list[Span] = []
@@ -256,11 +274,7 @@ class TelemetryHandler(logging.Handler):
if record.module in ("asyncio", "selector_events"):
return
- global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
-
- if BACKGROUND_LOGGER is None:
- raise RuntimeError("Telemetry API not initialized")
-
+ global CURRENT_TRACE_CONTEXT
context = CURRENT_TRACE_CONTEXT.get()
if context is None:
return
@@ -269,7 +283,7 @@ class TelemetryHandler(logging.Handler):
if span is None:
return
- BACKGROUND_LOGGER.log_event(
+ enqueue_event(
UnstructuredLogEvent(
trace_id=span.trace_id,
span_id=span.span_id,
diff --git a/llama_stack/providers/utils/vector_io/vector_utils.py b/llama_stack/providers/utils/vector_io/vector_utils.py
index e55ac75ae..324f35405 100644
--- a/llama_stack/providers/utils/vector_io/vector_utils.py
+++ b/llama_stack/providers/utils/vector_io/vector_utils.py
@@ -12,14 +12,12 @@ import uuid
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
"""
Generate a unique chunk ID using a hash of the document ID and chunk text.
-
- Note: MD5 is used only to calculate an identifier, not for security purposes.
- Adding usedforsecurity=False for compatibility with FIPS environments.
+ Then use the first 32 characters of the hash to create a UUID.
"""
hash_input = f"{document_id}:{chunk_text}".encode()
if chunk_window:
hash_input += f":{chunk_window}".encode()
- return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
+ return str(uuid.UUID(hashlib.sha256(hash_input).hexdigest()[:32]))
def proper_case(s: str) -> str:
diff --git a/llama_stack/testing/inference_recorder.py b/llama_stack/testing/inference_recorder.py
index 298758c92..674016fb1 100644
--- a/llama_stack/testing/inference_recorder.py
+++ b/llama_stack/testing/inference_recorder.py
@@ -15,6 +15,8 @@ from enum import StrEnum
from pathlib import Path
from typing import Any, Literal, cast
+from openai import NOT_GIVEN
+
from llama_stack.log import get_logger
logger = get_logger(__name__, category="testing")
@@ -105,8 +107,12 @@ def _deserialize_response(data: dict[str, Any]) -> Any:
return cls.model_validate(data["__data__"])
except (ImportError, AttributeError, TypeError, ValueError) as e:
- logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
- return data["__data__"]
+ logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_validate: {e}")
+ try:
+ return cls.model_construct(**data["__data__"])
+ except Exception as e:
+ logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_construct: {e}")
+ return data["__data__"]
return data
@@ -194,20 +200,15 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
Supported endpoints:
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
- - '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
+ - '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
Returns a list of unique identifiers or None if structure doesn't match.
"""
- body = response["body"]
- if endpoint == "/api/tags":
- items = body.get("models")
- idents = [m.model for m in items]
- else:
- items = body.get("data")
- idents = [m.id for m in items]
+ items = response["body"]
+ idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
return sorted(set(idents))
identifiers = _extract_model_identifiers()
- return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
+ return hashlib.sha256(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
@@ -215,28 +216,22 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
seen: dict[str, dict[str, Any]] = {}
for rec in records:
body = rec["response"]["body"]
- if endpoint == "/api/tags":
- items = body.models
- elif endpoint == "/v1/models":
- items = body.data
- else:
- items = []
-
- for m in items:
- if endpoint == "/v1/models":
+ if endpoint == "/v1/models":
+ for m in body:
key = m.id
- else:
+ seen[key] = m
+ elif endpoint == "/api/tags":
+ for m in body.models:
key = m.model
- seen[key] = m
+ seen[key] = m
ordered = [seen[k] for k in sorted(seen.keys())]
canonical = records[0]
canonical_req = canonical.get("request", {})
if isinstance(canonical_req, dict):
canonical_req["endpoint"] = endpoint
- if endpoint == "/v1/models":
- body = {"data": ordered, "object": "list"}
- else:
+ body = ordered
+ if endpoint == "/api/tags":
from ollama import ListResponse
body = ListResponse(models=ordered)
@@ -247,12 +242,17 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
global _current_mode, _current_storage
if _current_mode == InferenceMode.LIVE or _current_storage is None:
- # Normal operation
- return await original_method(self, *args, **kwargs)
+ if endpoint == "/v1/models":
+ return original_method(self, *args, **kwargs)
+ else:
+ return await original_method(self, *args, **kwargs)
# Get base URL based on client type
if client_type == "openai":
base_url = str(self._client.base_url)
+
+ # the OpenAI client methods may pass NOT_GIVEN for unset parameters; filter these out
+ kwargs = {k: v for k, v in kwargs.items() if v is not NOT_GIVEN}
elif client_type == "ollama":
# Get base URL from the client (Ollama client uses host attribute)
base_url = getattr(self, "host", "http://localhost:11434")
@@ -296,7 +296,14 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
)
elif _current_mode == InferenceMode.RECORD:
- response = await original_method(self, *args, **kwargs)
+ if endpoint == "/v1/models":
+ response = original_method(self, *args, **kwargs)
+ else:
+ response = await original_method(self, *args, **kwargs)
+
+ # we want to store the result of the iterator, not the iterator itself
+ if endpoint == "/v1/models":
+ response = [m async for m in response]
request_data = {
"method": method,
@@ -376,10 +383,14 @@ def patch_inference_clients():
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
)
- async def patched_models_list(self, *args, **kwargs):
- return await _patched_inference_method(
- _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
- )
+ def patched_models_list(self, *args, **kwargs):
+ async def _iter():
+ for item in await _patched_inference_method(
+ _original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
+ ):
+ yield item
+
+ return _iter()
# Apply OpenAI patches
AsyncChatCompletions.create = patched_chat_completions_create
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index 1db1c61cd..f333aa809 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -11,7 +11,7 @@
"@radix-ui/react-collapsible": "^1.1.12",
"@radix-ui/react-dialog": "^1.1.13",
"@radix-ui/react-dropdown-menu": "^2.1.16",
- "@radix-ui/react-select": "^2.2.5",
+ "@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slot": "^1.2.3",
"@radix-ui/react-tooltip": "^1.2.8",
@@ -20,7 +20,7 @@
"framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.21",
"lucide-react": "^0.542.0",
- "next": "15.3.3",
+ "next": "15.5.3",
"next-auth": "^4.24.11",
"next-themes": "^0.4.6",
"react": "^19.0.0",
@@ -664,9 +664,9 @@
}
},
"node_modules/@emnapi/runtime": {
- "version": "1.4.3",
- "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.3.tgz",
- "integrity": "sha512-pBPWdu6MLKROBX05wSNKcNb++m5Er+KQ9QkB+WVM+pW2Kx9hoSrVTnu3BdkI5eBLZoKu/J6mW/B6i6bJB2ytXQ==",
+ "version": "1.5.0",
+ "resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz",
+ "integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==",
"license": "MIT",
"optional": true,
"dependencies": {
@@ -927,9 +927,9 @@
}
},
"node_modules/@img/sharp-darwin-arm64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.1.tgz",
- "integrity": "sha512-pn44xgBtgpEbZsu+lWf2KNb6OAf70X68k+yk69Ic2Xz11zHR/w24/U49XT7AeRwJ0Px+mhALhU5LPci1Aymk7A==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz",
+ "integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==",
"cpu": [
"arm64"
],
@@ -945,13 +945,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-darwin-arm64": "1.1.0"
+ "@img/sharp-libvips-darwin-arm64": "1.2.0"
}
},
"node_modules/@img/sharp-darwin-x64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.1.tgz",
- "integrity": "sha512-VfuYgG2r8BpYiOUN+BfYeFo69nP/MIwAtSJ7/Zpxc5QF3KS22z8Pvg3FkrSFJBPNQ7mmcUcYQFBmEQp7eu1F8Q==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz",
+ "integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==",
"cpu": [
"x64"
],
@@ -967,13 +967,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-darwin-x64": "1.1.0"
+ "@img/sharp-libvips-darwin-x64": "1.2.0"
}
},
"node_modules/@img/sharp-libvips-darwin-arm64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.1.0.tgz",
- "integrity": "sha512-HZ/JUmPwrJSoM4DIQPv/BfNh9yrOA8tlBbqbLz4JZ5uew2+o22Ik+tHQJcih7QJuSa0zo5coHTfD5J8inqj9DA==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz",
+ "integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==",
"cpu": [
"arm64"
],
@@ -987,9 +987,9 @@
}
},
"node_modules/@img/sharp-libvips-darwin-x64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.1.0.tgz",
- "integrity": "sha512-Xzc2ToEmHN+hfvsl9wja0RlnXEgpKNmftriQp6XzY/RaSfwD9th+MSh0WQKzUreLKKINb3afirxW7A0fz2YWuQ==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz",
+ "integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==",
"cpu": [
"x64"
],
@@ -1003,9 +1003,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-arm": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.1.0.tgz",
- "integrity": "sha512-s8BAd0lwUIvYCJyRdFqvsj+BJIpDBSxs6ivrOPm/R7piTs5UIwY5OjXrP2bqXC9/moGsyRa37eYWYCOGVXxVrA==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz",
+ "integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==",
"cpu": [
"arm"
],
@@ -1019,9 +1019,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-arm64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.1.0.tgz",
- "integrity": "sha512-IVfGJa7gjChDET1dK9SekxFFdflarnUB8PwW8aGwEoF3oAsSDuNUTYS+SKDOyOJxQyDC1aPFMuRYLoDInyV9Ew==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz",
+ "integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==",
"cpu": [
"arm64"
],
@@ -1035,9 +1035,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-ppc64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.1.0.tgz",
- "integrity": "sha512-tiXxFZFbhnkWE2LA8oQj7KYR+bWBkiV2nilRldT7bqoEZ4HiDOcePr9wVDAZPi/Id5fT1oY9iGnDq20cwUz8lQ==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz",
+ "integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==",
"cpu": [
"ppc64"
],
@@ -1051,9 +1051,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-s390x": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.1.0.tgz",
- "integrity": "sha512-xukSwvhguw7COyzvmjydRb3x/09+21HykyapcZchiCUkTThEQEOMtBj9UhkaBRLuBrgLFzQ2wbxdeCCJW/jgJA==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz",
+ "integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==",
"cpu": [
"s390x"
],
@@ -1067,9 +1067,9 @@
}
},
"node_modules/@img/sharp-libvips-linux-x64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.1.0.tgz",
- "integrity": "sha512-yRj2+reB8iMg9W5sULM3S74jVS7zqSzHG3Ol/twnAAkAhnGQnpjj6e4ayUz7V+FpKypwgs82xbRdYtchTTUB+Q==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz",
+ "integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==",
"cpu": [
"x64"
],
@@ -1083,9 +1083,9 @@
}
},
"node_modules/@img/sharp-libvips-linuxmusl-arm64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.1.0.tgz",
- "integrity": "sha512-jYZdG+whg0MDK+q2COKbYidaqW/WTz0cc1E+tMAusiDygrM4ypmSCjOJPmFTvHHJ8j/6cAGyeDWZOsK06tP33w==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz",
+ "integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==",
"cpu": [
"arm64"
],
@@ -1099,9 +1099,9 @@
}
},
"node_modules/@img/sharp-libvips-linuxmusl-x64": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.1.0.tgz",
- "integrity": "sha512-wK7SBdwrAiycjXdkPnGCPLjYb9lD4l6Ze2gSdAGVZrEL05AOUJESWU2lhlC+Ffn5/G+VKuSm6zzbQSzFX/P65A==",
+ "version": "1.2.0",
+ "resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz",
+ "integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==",
"cpu": [
"x64"
],
@@ -1115,9 +1115,9 @@
}
},
"node_modules/@img/sharp-linux-arm": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.1.tgz",
- "integrity": "sha512-anKiszvACti2sGy9CirTlNyk7BjjZPiML1jt2ZkTdcvpLU1YH6CXwRAZCA2UmRXnhiIftXQ7+Oh62Ji25W72jA==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz",
+ "integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==",
"cpu": [
"arm"
],
@@ -1133,13 +1133,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linux-arm": "1.1.0"
+ "@img/sharp-libvips-linux-arm": "1.2.0"
}
},
"node_modules/@img/sharp-linux-arm64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.1.tgz",
- "integrity": "sha512-kX2c+vbvaXC6vly1RDf/IWNXxrlxLNpBVWkdpRq5Ka7OOKj6nr66etKy2IENf6FtOgklkg9ZdGpEu9kwdlcwOQ==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz",
+ "integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==",
"cpu": [
"arm64"
],
@@ -1155,13 +1155,35 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linux-arm64": "1.1.0"
+ "@img/sharp-libvips-linux-arm64": "1.2.0"
+ }
+ },
+ "node_modules/@img/sharp-linux-ppc64": {
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz",
+ "integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==",
+ "cpu": [
+ "ppc64"
+ ],
+ "license": "Apache-2.0",
+ "optional": true,
+ "os": [
+ "linux"
+ ],
+ "engines": {
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+ },
+ "funding": {
+ "url": "https://opencollective.com/libvips"
+ },
+ "optionalDependencies": {
+ "@img/sharp-libvips-linux-ppc64": "1.2.0"
}
},
"node_modules/@img/sharp-linux-s390x": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.1.tgz",
- "integrity": "sha512-7s0KX2tI9mZI2buRipKIw2X1ufdTeaRgwmRabt5bi9chYfhur+/C1OXg3TKg/eag1W+6CCWLVmSauV1owmRPxA==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz",
+ "integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==",
"cpu": [
"s390x"
],
@@ -1177,13 +1199,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linux-s390x": "1.1.0"
+ "@img/sharp-libvips-linux-s390x": "1.2.0"
}
},
"node_modules/@img/sharp-linux-x64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.1.tgz",
- "integrity": "sha512-wExv7SH9nmoBW3Wr2gvQopX1k8q2g5V5Iag8Zk6AVENsjwd+3adjwxtp3Dcu2QhOXr8W9NusBU6XcQUohBZ5MA==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz",
+ "integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==",
"cpu": [
"x64"
],
@@ -1199,13 +1221,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linux-x64": "1.1.0"
+ "@img/sharp-libvips-linux-x64": "1.2.0"
}
},
"node_modules/@img/sharp-linuxmusl-arm64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.1.tgz",
- "integrity": "sha512-DfvyxzHxw4WGdPiTF0SOHnm11Xv4aQexvqhRDAoD00MzHekAj9a/jADXeXYCDFH/DzYruwHbXU7uz+H+nWmSOQ==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz",
+ "integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==",
"cpu": [
"arm64"
],
@@ -1221,13 +1243,13 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linuxmusl-arm64": "1.1.0"
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0"
}
},
"node_modules/@img/sharp-linuxmusl-x64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.1.tgz",
- "integrity": "sha512-pax/kTR407vNb9qaSIiWVnQplPcGU8LRIJpDT5o8PdAx5aAA7AS3X9PS8Isw1/WfqgQorPotjrZL3Pqh6C5EBg==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz",
+ "integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==",
"cpu": [
"x64"
],
@@ -1243,20 +1265,20 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-libvips-linuxmusl-x64": "1.1.0"
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0"
}
},
"node_modules/@img/sharp-wasm32": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.1.tgz",
- "integrity": "sha512-YDybQnYrLQfEpzGOQe7OKcyLUCML4YOXl428gOOzBgN6Gw0rv8dpsJ7PqTHxBnXnwXr8S1mYFSLSa727tpz0xg==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz",
+ "integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==",
"cpu": [
"wasm32"
],
"license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
"optional": true,
"dependencies": {
- "@emnapi/runtime": "^1.4.0"
+ "@emnapi/runtime": "^1.4.4"
},
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -1265,10 +1287,29 @@
"url": "https://opencollective.com/libvips"
}
},
+ "node_modules/@img/sharp-win32-arm64": {
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz",
+ "integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==",
+ "cpu": [
+ "arm64"
+ ],
+ "license": "Apache-2.0 AND LGPL-3.0-or-later",
+ "optional": true,
+ "os": [
+ "win32"
+ ],
+ "engines": {
+ "node": "^18.17.0 || ^20.3.0 || >=21.0.0"
+ },
+ "funding": {
+ "url": "https://opencollective.com/libvips"
+ }
+ },
"node_modules/@img/sharp-win32-ia32": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.1.tgz",
- "integrity": "sha512-WKf/NAZITnonBf3U1LfdjoMgNO5JYRSlhovhRhMxXVdvWYveM4kM3L8m35onYIdh75cOMCo1BexgVQcCDzyoWw==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz",
+ "integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==",
"cpu": [
"ia32"
],
@@ -1285,9 +1326,9 @@
}
},
"node_modules/@img/sharp-win32-x64": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.1.tgz",
- "integrity": "sha512-hw1iIAHpNE8q3uMIRCgGOeDoz9KtFNarFLQclLxr/LK1VBkj8nby18RjFvr6aP7USRYAjTZW6yisnBWMX571Tw==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz",
+ "integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==",
"cpu": [
"x64"
],
@@ -1849,9 +1890,10 @@
}
},
"node_modules/@next/env": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/env/-/env-15.3.3.tgz",
- "integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.3.tgz",
+ "integrity": "sha512-RSEDTRqyihYXygx/OJXwvVupfr9m04+0vH8vyy0HfZ7keRto6VX9BbEk0J2PUk0VGy6YhklJUSrgForov5F9pw==",
+ "license": "MIT"
},
"node_modules/@next/eslint-plugin-next": {
"version": "15.5.2",
@@ -1864,12 +1906,13 @@
}
},
"node_modules/@next/swc-darwin-arm64": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.3.3.tgz",
- "integrity": "sha512-WRJERLuH+O3oYB4yZNVahSVFmtxRNjNF1I1c34tYMoJb0Pve+7/RaLAJJizyYiFhjYNGHRAE1Ri2Fd23zgDqhg==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.3.tgz",
+ "integrity": "sha512-nzbHQo69+au9wJkGKTU9lP7PXv0d1J5ljFpvb+LnEomLtSbJkbZyEs6sbF3plQmiOB2l9OBtN2tNSvCH1nQ9Jg==",
"cpu": [
"arm64"
],
+ "license": "MIT",
"optional": true,
"os": [
"darwin"
@@ -1879,12 +1922,13 @@
}
},
"node_modules/@next/swc-darwin-x64": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.3.3.tgz",
- "integrity": "sha512-XHdzH/yBc55lu78k/XwtuFR/ZXUTcflpRXcsu0nKmF45U96jt1tsOZhVrn5YH+paw66zOANpOnFQ9i6/j+UYvw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.3.tgz",
+ "integrity": "sha512-w83w4SkOOhekJOcA5HBvHyGzgV1W/XvOfpkrxIse4uPWhYTTRwtGEM4v/jiXwNSJvfRvah0H8/uTLBKRXlef8g==",
"cpu": [
"x64"
],
+ "license": "MIT",
"optional": true,
"os": [
"darwin"
@@ -1894,12 +1938,13 @@
}
},
"node_modules/@next/swc-linux-arm64-gnu": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.3.3.tgz",
- "integrity": "sha512-VZ3sYL2LXB8znNGcjhocikEkag/8xiLgnvQts41tq6i+wql63SMS1Q6N8RVXHw5pEUjiof+II3HkDd7GFcgkzw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.3.tgz",
+ "integrity": "sha512-+m7pfIs0/yvgVu26ieaKrifV8C8yiLe7jVp9SpcIzg7XmyyNE7toC1fy5IOQozmr6kWl/JONC51osih2RyoXRw==",
"cpu": [
"arm64"
],
+ "license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1909,12 +1954,13 @@
}
},
"node_modules/@next/swc-linux-arm64-musl": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.3.3.tgz",
- "integrity": "sha512-h6Y1fLU4RWAp1HPNJWDYBQ+e3G7sLckyBXhmH9ajn8l/RSMnhbuPBV/fXmy3muMcVwoJdHL+UtzRzs0nXOf9SA==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.3.tgz",
+ "integrity": "sha512-u3PEIzuguSenoZviZJahNLgCexGFhso5mxWCrrIMdvpZn6lkME5vc/ADZG8UUk5K1uWRy4hqSFECrON6UKQBbQ==",
"cpu": [
"arm64"
],
+ "license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1924,12 +1970,13 @@
}
},
"node_modules/@next/swc-linux-x64-gnu": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.3.3.tgz",
- "integrity": "sha512-jJ8HRiF3N8Zw6hGlytCj5BiHyG/K+fnTKVDEKvUCyiQ/0r5tgwO7OgaRiOjjRoIx2vwLR+Rz8hQoPrnmFbJdfw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.3.tgz",
+ "integrity": "sha512-lDtOOScYDZxI2BENN9m0pfVPJDSuUkAD1YXSvlJF0DKwZt0WlA7T7o3wrcEr4Q+iHYGzEaVuZcsIbCps4K27sA==",
"cpu": [
"x64"
],
+ "license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1939,12 +1986,13 @@
}
},
"node_modules/@next/swc-linux-x64-musl": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.3.3.tgz",
- "integrity": "sha512-HrUcTr4N+RgiiGn3jjeT6Oo208UT/7BuTr7K0mdKRBtTbT4v9zJqCDKO97DUqqoBK1qyzP1RwvrWTvU6EPh/Cw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.3.tgz",
+ "integrity": "sha512-9vWVUnsx9PrY2NwdVRJ4dUURAQ8Su0sLRPqcCCxtX5zIQUBES12eRVHq6b70bbfaVaxIDGJN2afHui0eDm+cLg==",
"cpu": [
"x64"
],
+ "license": "MIT",
"optional": true,
"os": [
"linux"
@@ -1954,12 +2002,13 @@
}
},
"node_modules/@next/swc-win32-arm64-msvc": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.3.3.tgz",
- "integrity": "sha512-SxorONgi6K7ZUysMtRF3mIeHC5aA3IQLmKFQzU0OuhuUYwpOBc1ypaLJLP5Bf3M9k53KUUUj4vTPwzGvl/NwlQ==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.3.tgz",
+ "integrity": "sha512-1CU20FZzY9LFQigRi6jM45oJMU3KziA5/sSG+dXeVaTm661snQP6xu3ykGxxwU5sLG3sh14teO/IOEPVsQMRfA==",
"cpu": [
"arm64"
],
+ "license": "MIT",
"optional": true,
"os": [
"win32"
@@ -1969,12 +2018,13 @@
}
},
"node_modules/@next/swc-win32-x64-msvc": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.3.3.tgz",
- "integrity": "sha512-4QZG6F8enl9/S2+yIiOiju0iCTFd93d8VC1q9LZS4p/Xuk81W2QDjCFeoogmrWWkAD59z8ZxepBQap2dKS5ruw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.3.tgz",
+ "integrity": "sha512-JMoLAq3n3y5tKXPQwCK5c+6tmwkuFDa2XAxz8Wm4+IVthdBZdZGh+lmiLUHg9f9IDwIQpUjp+ysd6OkYTyZRZw==",
"cpu": [
"x64"
],
+ "license": "MIT",
"optional": true,
"os": [
"win32"
@@ -2874,22 +2924,22 @@
}
},
"node_modules/@radix-ui/react-select": {
- "version": "2.2.5",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
- "integrity": "sha512-HnMTdXEVuuyzx63ME0ut4+sEMYW6oouHWNGUZc7ddvUWIcfCva/AMoqEW/3wnEllriMWBa0RHspCYnfCWJQYmA==",
+ "version": "2.2.6",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz",
+ "integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==",
"license": "MIT",
"dependencies": {
"@radix-ui/number": "1.1.1",
- "@radix-ui/primitive": "1.1.2",
+ "@radix-ui/primitive": "1.1.3",
"@radix-ui/react-collection": "1.1.7",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-context": "1.1.2",
"@radix-ui/react-direction": "1.1.1",
- "@radix-ui/react-dismissable-layer": "1.1.10",
- "@radix-ui/react-focus-guards": "1.1.2",
+ "@radix-ui/react-dismissable-layer": "1.1.11",
+ "@radix-ui/react-focus-guards": "1.1.3",
"@radix-ui/react-focus-scope": "1.1.7",
"@radix-ui/react-id": "1.1.1",
- "@radix-ui/react-popper": "1.2.7",
+ "@radix-ui/react-popper": "1.2.8",
"@radix-ui/react-portal": "1.1.9",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-slot": "1.2.3",
@@ -2916,13 +2966,19 @@
}
}
},
+ "node_modules/@radix-ui/react-select/node_modules/@radix-ui/primitive": {
+ "version": "1.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
+ "integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
+ "license": "MIT"
+ },
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
- "version": "1.1.10",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
- "integrity": "sha512-IM1zzRV4W3HtVgftdQiiOmA0AdJlCtMLe00FXaHwgt3rAnNsIyDqshvkIW3hj/iu5hu8ERP7KIYki6NkqDxAwQ==",
+ "version": "1.1.11",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
+ "integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
"license": "MIT",
"dependencies": {
- "@radix-ui/primitive": "1.1.2",
+ "@radix-ui/primitive": "1.1.3",
"@radix-ui/react-compose-refs": "1.1.2",
"@radix-ui/react-primitive": "2.1.3",
"@radix-ui/react-use-callback-ref": "1.1.1",
@@ -2943,6 +2999,21 @@
}
}
},
+ "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-focus-guards": {
+ "version": "1.1.3",
+ "resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
+ "integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
+ "license": "MIT",
+ "peerDependencies": {
+ "@types/react": "*",
+ "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
+ },
+ "peerDependenciesMeta": {
+ "@types/react": {
+ "optional": true
+ }
+ }
+ },
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-focus-scope": {
"version": "1.1.7",
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
@@ -2968,38 +3039,6 @@
}
}
},
- "node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-popper": {
- "version": "1.2.7",
- "resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.7.tgz",
- "integrity": "sha512-IUFAccz1JyKcf/RjB552PlWwxjeCJB8/4KxT7EhBHOJM+mN7LdW+B3kacJXILm32xawcMMjb2i0cIZpo+f9kiQ==",
- "license": "MIT",
- "dependencies": {
- "@floating-ui/react-dom": "^2.0.0",
- "@radix-ui/react-arrow": "1.1.7",
- "@radix-ui/react-compose-refs": "1.1.2",
- "@radix-ui/react-context": "1.1.2",
- "@radix-ui/react-primitive": "2.1.3",
- "@radix-ui/react-use-callback-ref": "1.1.1",
- "@radix-ui/react-use-layout-effect": "1.1.1",
- "@radix-ui/react-use-rect": "1.1.1",
- "@radix-ui/react-use-size": "1.1.1",
- "@radix-ui/rect": "1.1.1"
- },
- "peerDependencies": {
- "@types/react": "*",
- "@types/react-dom": "*",
- "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
- "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
- },
- "peerDependenciesMeta": {
- "@types/react": {
- "optional": true
- },
- "@types/react-dom": {
- "optional": true
- }
- }
- },
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-portal": {
"version": "1.1.9",
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
@@ -3547,12 +3586,6 @@
"@sinonjs/commons": "^3.0.0"
}
},
- "node_modules/@swc/counter": {
- "version": "0.1.3",
- "resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz",
- "integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==",
- "license": "Apache-2.0"
- },
"node_modules/@swc/helpers": {
"version": "0.5.15",
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz",
@@ -3578,6 +3611,13 @@
"tailwindcss": "4.1.6"
}
},
+ "node_modules/@tailwindcss/node/node_modules/tailwindcss": {
+ "version": "4.1.6",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
+ "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
+ "dev": true,
+ "license": "MIT"
+ },
"node_modules/@tailwindcss/oxide": {
"version": "4.1.6",
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
@@ -3838,6 +3878,13 @@
"tailwindcss": "4.1.6"
}
},
+ "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
+ "version": "4.1.6",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
+ "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
+ "dev": true,
+ "license": "MIT"
+ },
"node_modules/@testing-library/dom": {
"version": "10.4.1",
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
@@ -5461,17 +5508,6 @@
"dev": true,
"license": "MIT"
},
- "node_modules/busboy": {
- "version": "1.6.0",
- "resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
- "integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
- "dependencies": {
- "streamsearch": "^1.1.0"
- },
- "engines": {
- "node": ">=10.16.0"
- }
- },
"node_modules/bytes": {
"version": "3.1.2",
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
@@ -8281,9 +8317,9 @@
}
},
"node_modules/is-arrayish": {
- "version": "0.3.2",
- "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
- "integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==",
+ "version": "0.3.4",
+ "resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz",
+ "integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==",
"license": "MIT",
"optional": true
},
@@ -11528,14 +11564,13 @@
}
},
"node_modules/next": {
- "version": "15.3.3",
- "resolved": "https://registry.npmjs.org/next/-/next-15.3.3.tgz",
- "integrity": "sha512-JqNj29hHNmCLtNvd090SyRbXJiivQ+58XjCcrC50Crb5g5u2zi7Y2YivbsEfzk6AtVI80akdOQbaMZwWB1Hthw==",
+ "version": "15.5.3",
+ "resolved": "https://registry.npmjs.org/next/-/next-15.5.3.tgz",
+ "integrity": "sha512-r/liNAx16SQj4D+XH/oI1dlpv9tdKJ6cONYPwwcCC46f2NjpaRWY+EKCzULfgQYV6YKXjHBchff2IZBSlZmJNw==",
+ "license": "MIT",
"dependencies": {
- "@next/env": "15.3.3",
- "@swc/counter": "0.1.3",
+ "@next/env": "15.5.3",
"@swc/helpers": "0.5.15",
- "busboy": "1.6.0",
"caniuse-lite": "^1.0.30001579",
"postcss": "8.4.31",
"styled-jsx": "5.1.6"
@@ -11547,19 +11582,19 @@
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
},
"optionalDependencies": {
- "@next/swc-darwin-arm64": "15.3.3",
- "@next/swc-darwin-x64": "15.3.3",
- "@next/swc-linux-arm64-gnu": "15.3.3",
- "@next/swc-linux-arm64-musl": "15.3.3",
- "@next/swc-linux-x64-gnu": "15.3.3",
- "@next/swc-linux-x64-musl": "15.3.3",
- "@next/swc-win32-arm64-msvc": "15.3.3",
- "@next/swc-win32-x64-msvc": "15.3.3",
- "sharp": "^0.34.1"
+ "@next/swc-darwin-arm64": "15.5.3",
+ "@next/swc-darwin-x64": "15.5.3",
+ "@next/swc-linux-arm64-gnu": "15.5.3",
+ "@next/swc-linux-arm64-musl": "15.5.3",
+ "@next/swc-linux-x64-gnu": "15.5.3",
+ "@next/swc-linux-x64-musl": "15.5.3",
+ "@next/swc-win32-arm64-msvc": "15.5.3",
+ "@next/swc-win32-x64-msvc": "15.5.3",
+ "sharp": "^0.34.3"
},
"peerDependencies": {
"@opentelemetry/api": "^1.1.0",
- "@playwright/test": "^1.41.2",
+ "@playwright/test": "^1.51.1",
"babel-plugin-react-compiler": "*",
"react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
"react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
@@ -13226,16 +13261,16 @@
"license": "ISC"
},
"node_modules/sharp": {
- "version": "0.34.1",
- "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.1.tgz",
- "integrity": "sha512-1j0w61+eVxu7DawFJtnfYcvSv6qPFvfTaqzTQ2BLknVhHTwGS8sc63ZBF4rzkWMBVKybo4S5OBtDdZahh2A1xg==",
+ "version": "0.34.3",
+ "resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz",
+ "integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==",
"hasInstallScript": true,
"license": "Apache-2.0",
"optional": true,
"dependencies": {
"color": "^4.2.3",
- "detect-libc": "^2.0.3",
- "semver": "^7.7.1"
+ "detect-libc": "^2.0.4",
+ "semver": "^7.7.2"
},
"engines": {
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
@@ -13244,26 +13279,28 @@
"url": "https://opencollective.com/libvips"
},
"optionalDependencies": {
- "@img/sharp-darwin-arm64": "0.34.1",
- "@img/sharp-darwin-x64": "0.34.1",
- "@img/sharp-libvips-darwin-arm64": "1.1.0",
- "@img/sharp-libvips-darwin-x64": "1.1.0",
- "@img/sharp-libvips-linux-arm": "1.1.0",
- "@img/sharp-libvips-linux-arm64": "1.1.0",
- "@img/sharp-libvips-linux-ppc64": "1.1.0",
- "@img/sharp-libvips-linux-s390x": "1.1.0",
- "@img/sharp-libvips-linux-x64": "1.1.0",
- "@img/sharp-libvips-linuxmusl-arm64": "1.1.0",
- "@img/sharp-libvips-linuxmusl-x64": "1.1.0",
- "@img/sharp-linux-arm": "0.34.1",
- "@img/sharp-linux-arm64": "0.34.1",
- "@img/sharp-linux-s390x": "0.34.1",
- "@img/sharp-linux-x64": "0.34.1",
- "@img/sharp-linuxmusl-arm64": "0.34.1",
- "@img/sharp-linuxmusl-x64": "0.34.1",
- "@img/sharp-wasm32": "0.34.1",
- "@img/sharp-win32-ia32": "0.34.1",
- "@img/sharp-win32-x64": "0.34.1"
+ "@img/sharp-darwin-arm64": "0.34.3",
+ "@img/sharp-darwin-x64": "0.34.3",
+ "@img/sharp-libvips-darwin-arm64": "1.2.0",
+ "@img/sharp-libvips-darwin-x64": "1.2.0",
+ "@img/sharp-libvips-linux-arm": "1.2.0",
+ "@img/sharp-libvips-linux-arm64": "1.2.0",
+ "@img/sharp-libvips-linux-ppc64": "1.2.0",
+ "@img/sharp-libvips-linux-s390x": "1.2.0",
+ "@img/sharp-libvips-linux-x64": "1.2.0",
+ "@img/sharp-libvips-linuxmusl-arm64": "1.2.0",
+ "@img/sharp-libvips-linuxmusl-x64": "1.2.0",
+ "@img/sharp-linux-arm": "0.34.3",
+ "@img/sharp-linux-arm64": "0.34.3",
+ "@img/sharp-linux-ppc64": "0.34.3",
+ "@img/sharp-linux-s390x": "0.34.3",
+ "@img/sharp-linux-x64": "0.34.3",
+ "@img/sharp-linuxmusl-arm64": "0.34.3",
+ "@img/sharp-linuxmusl-x64": "0.34.3",
+ "@img/sharp-wasm32": "0.34.3",
+ "@img/sharp-win32-arm64": "0.34.3",
+ "@img/sharp-win32-ia32": "0.34.3",
+ "@img/sharp-win32-x64": "0.34.3"
}
},
"node_modules/shebang-command": {
@@ -13389,9 +13426,9 @@
"license": "ISC"
},
"node_modules/simple-swizzle": {
- "version": "0.2.2",
- "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
- "integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
+ "version": "0.2.4",
+ "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz",
+ "integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==",
"license": "MIT",
"optional": true,
"dependencies": {
@@ -13512,14 +13549,6 @@
"node": ">= 0.8"
}
},
- "node_modules/streamsearch": {
- "version": "1.1.0",
- "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
- "integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
- "engines": {
- "node": ">=10.0.0"
- }
- },
"node_modules/string-length": {
"version": "4.0.2",
"resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz",
@@ -13843,9 +13872,9 @@
}
},
"node_modules/tailwindcss": {
- "version": "4.1.6",
- "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
- "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
+ "version": "4.1.13",
+ "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
+ "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
"dev": true,
"license": "MIT"
},
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index e50401fa6..ccbc2a4c2 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -16,7 +16,7 @@
"@radix-ui/react-collapsible": "^1.1.12",
"@radix-ui/react-dialog": "^1.1.13",
"@radix-ui/react-dropdown-menu": "^2.1.16",
- "@radix-ui/react-select": "^2.2.5",
+ "@radix-ui/react-select": "^2.2.6",
"@radix-ui/react-separator": "^1.1.7",
"@radix-ui/react-slot": "^1.2.3",
"@radix-ui/react-tooltip": "^1.2.8",
@@ -25,7 +25,7 @@
"framer-motion": "^12.23.12",
"llama-stack-client": "^0.2.21",
"lucide-react": "^0.542.0",
- "next": "15.3.3",
+ "next": "15.5.3",
"next-auth": "^4.24.11",
"next-themes": "^0.4.6",
"react": "^19.0.0",
diff --git a/pyproject.toml b/pyproject.toml
index 0414aafb0..ce95b758f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,7 +32,7 @@ dependencies = [
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.2.21",
- "openai>=1.99.6",
+ "openai>=1.100.0", # for expires_after support
"prompt-toolkit",
"python-dotenv",
"python-jose[cryptography]",
@@ -80,7 +80,6 @@ dev = [
unit = [
"sqlite-vec",
"ollama",
- "openai",
"aiosqlite",
"aiohttp",
"psycopg2-binary>=2.9.0",
@@ -105,7 +104,6 @@ unit = [
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
# dependencies.
test = [
- "openai>=1.100.0", # for expires_after support
"aiosqlite",
"aiohttp",
"torch>=2.6.0",
@@ -356,6 +354,7 @@ warn_required_dynamic_aliases = true
classmethod-decorators = ["classmethod", "pydantic.field_validator"]
[tool.pytest.ini_options]
+addopts = ["--durations=10"]
asyncio_mode = "auto"
markers = [
"allow_network: Allow network access for specific unit tests",
diff --git a/scripts/github/schedule-record-workflow.sh b/scripts/github/schedule-record-workflow.sh
index c292e53e6..44b0947b6 100755
--- a/scripts/github/schedule-record-workflow.sh
+++ b/scripts/github/schedule-record-workflow.sh
@@ -239,8 +239,9 @@ echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
echo ""
# Prepare inputs for gh workflow run
+INPUTS=
if [[ -n "$TEST_SUBDIRS" ]]; then
- INPUTS="-f subdirs='$TEST_SUBDIRS'"
+ INPUTS="$INPUTS -f subdirs='$TEST_SUBDIRS'"
fi
if [[ -n "$TEST_SETUP" ]]; then
INPUTS="$INPUTS -f test-setup='$TEST_SETUP'"
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index f9c837ebd..22dec8876 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -6,12 +6,25 @@
import time
+import unicodedata
import pytest
from ..test_cases.test_case import TestCase
+def _normalize_text(text: str) -> str:
+ """
+ Normalize Unicode text by removing diacritical marks for comparison.
+
+ The test case streaming_01 expects the answer "Sol" for the question "What's the name of the Sun
+ in latin?", but the model is returning "sōl" (with a macron over the 'o'), which is the correct
+ Latin spelling. The test is failing because it's doing a simple case-insensitive string search
+ for "sol" but the actual response contains the diacritical mark.
+ """
+ return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("ascii").lower()
+
+
def provider_from_model(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
@@ -42,6 +55,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
"remote::groq",
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
+ "remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
+ # does not work with the specified model, gpt-5-mini. Please choose different model and try
+ # again. You can learn more about which models can be used with each operation here:
+ # https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
@@ -157,7 +174,8 @@ def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_
assert len(response.choices) > 0
choice = response.choices[0]
assert len(choice.text) > 5
- assert "france" in choice.text.lower()
+ normalized_text = _normalize_text(choice.text)
+ assert "france" in normalized_text
@pytest.mark.parametrize(
@@ -248,7 +266,9 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
)
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
- assert expected.lower() in message_content
+ normalized_expected = _normalize_text(expected)
+ normalized_content = _normalize_text(message_content)
+ assert normalized_expected in normalized_content
@pytest.mark.parametrize(
@@ -272,10 +292,13 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
)
streamed_content = []
for chunk in response:
- if chunk.choices[0].delta.content:
+ # On some providers like Azure, the choices are empty on the first chunk, so we need to check for that
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
streamed_content.append(chunk.choices[0].delta.content.lower().strip())
assert len(streamed_content) > 0
- assert expected.lower() in "".join(streamed_content)
+ normalized_expected = _normalize_text(expected)
+ normalized_content = _normalize_text("".join(streamed_content))
+ assert normalized_expected in normalized_content
@pytest.mark.parametrize(
@@ -308,8 +331,12 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
)
assert len(streamed_content) == 2
+ normalized_expected = _normalize_text(expected)
for i, content in streamed_content.items():
- assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
+ normalized_content = _normalize_text(content)
+ assert normalized_expected in normalized_content, (
+ f"Choice {i}: Expected {normalized_expected} in {normalized_content}"
+ )
@pytest.mark.parametrize(
@@ -339,9 +366,9 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
content = ""
response_id = None
for chunk in response:
- if response_id is None:
+ if response_id is None and chunk.id:
response_id = chunk.id
- if chunk.choices[0].delta.content:
+ if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
content += chunk.choices[0].delta.content
else:
response_id = response.id
@@ -410,11 +437,12 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
content = ""
response_id = None
for chunk in response:
- if response_id is None:
+ if response_id is None and chunk.id:
response_id = chunk.id
- if delta := chunk.choices[0].delta:
- if delta.content:
- content += delta.content
+ if chunk.choices and len(chunk.choices) > 0:
+ if delta := chunk.choices[0].delta:
+ if delta.content:
+ content += delta.content
else:
response_id = response.id
content = response.choices[0].message.content
@@ -484,4 +512,5 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
stream=False,
)
message_content = response.choices[0].message.content.lower().strip()
- assert "hello world" in message_content
+ normalized_content = _normalize_text(message_content)
+ assert "hello world" in normalized_content
diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py
index d7ffe5929..621084231 100644
--- a/tests/integration/inference/test_text_inference.py
+++ b/tests/integration/inference/test_text_inference.py
@@ -32,6 +32,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
"remote::vertexai",
"remote::groq",
"remote::sambanova",
+ "remote::azure",
)
or "openai-compat" in provider.provider_type
):
@@ -44,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
- if provider.provider_type in ("remote::sambanova",):
+ if provider.provider_type in ("remote::sambanova", "remote::azure"):
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
)
diff --git a/tests/integration/recordings/responses/0fda25b9241c.json b/tests/integration/recordings/responses/0fda25b9241c.json
new file mode 100644
index 000000000..b97ee1670
--- /dev/null
+++ b/tests/integration/recordings/responses/0fda25b9241c.json
@@ -0,0 +1,71 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Which planet do humans live on?"
+ }
+ ],
+ "stream": false
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "chatcmpl-CECIXqfvjuluKkZtG3q2QJoSQhBU0",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "Humans live on Earth \u2014 the third planet from the Sun. It's the only known planet that naturally supports life, with a breathable atmosphere, liquid water, and temperatures suitable for living organisms.",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": [],
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ },
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499901,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": {
+ "completion_tokens": 112,
+ "prompt_tokens": 13,
+ "total_tokens": 125,
+ "completion_tokens_details": {
+ "accepted_prediction_tokens": 0,
+ "audio_tokens": 0,
+ "reasoning_tokens": 64,
+ "rejected_prediction_tokens": 0
+ },
+ "prompt_tokens_details": {
+ "audio_tokens": 0,
+ "cached_tokens": 0
+ }
+ },
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ "is_streaming": false
+ }
+}
diff --git a/tests/integration/recordings/responses/2b2ad549510d.json b/tests/integration/recordings/responses/2b2ad549510d.json
new file mode 100644
index 000000000..55a9d6426
--- /dev/null
+++ b/tests/integration/recordings/responses/2b2ad549510d.json
@@ -0,0 +1,448 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, world!"
+ }
+ ],
+ "stream": true
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": "Hello",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " world",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": "!",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " Hi",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " \u2014",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " how",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " can",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " I",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " help",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " you",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": " today",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": "?",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499910,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/recordings/responses/57b67d1b1a36.json b/tests/integration/recordings/responses/57b67d1b1a36.json
new file mode 100644
index 000000000..14de1d85e
--- /dev/null
+++ b/tests/integration/recordings/responses/57b67d1b1a36.json
@@ -0,0 +1,71 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Which planet has rings around it with a name starting with letter S?"
+ }
+ ],
+ "stream": false
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "chatcmpl-CECIkT5cbqFazpungtewksVePcUNa",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "Saturn. It's the planet famous for its prominent ring system made of ice and rock.",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": [],
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ },
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499914,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": {
+ "completion_tokens": 156,
+ "prompt_tokens": 20,
+ "total_tokens": 176,
+ "completion_tokens_details": {
+ "accepted_prediction_tokens": 0,
+ "audio_tokens": 0,
+ "reasoning_tokens": 128,
+ "rejected_prediction_tokens": 0
+ },
+ "prompt_tokens_details": {
+ "audio_tokens": 0,
+ "cached_tokens": 0
+ }
+ },
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ "is_streaming": false
+ }
+}
diff --git a/tests/integration/recordings/responses/8752115f8d0c.json b/tests/integration/recordings/responses/8752115f8d0c.json
new file mode 100644
index 000000000..0e88bbfa6
--- /dev/null
+++ b/tests/integration/recordings/responses/8752115f8d0c.json
@@ -0,0 +1,71 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "Hello, world!"
+ }
+ ],
+ "stream": false
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "chatcmpl-CECIuyylsMNXspa83k8LrD8SQadNY",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": "Hello! \ud83d\udc4b How can I help you today \u2014 answer a question, write or edit something, debug code, brainstorm ideas, or anything else?",
+ "refusal": null,
+ "role": "assistant",
+ "annotations": [],
+ "audio": null,
+ "function_call": null,
+ "tool_calls": null
+ },
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499924,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": {
+ "completion_tokens": 40,
+ "prompt_tokens": 10,
+ "total_tokens": 50,
+ "completion_tokens_details": {
+ "accepted_prediction_tokens": 0,
+ "audio_tokens": 0,
+ "reasoning_tokens": 0,
+ "rejected_prediction_tokens": 0
+ },
+ "prompt_tokens_details": {
+ "audio_tokens": 0,
+ "cached_tokens": 0
+ }
+ },
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ "is_streaming": false
+ }
+}
diff --git a/tests/integration/recordings/responses/94d11daee205.json b/tests/integration/recordings/responses/94d11daee205.json
new file mode 100644
index 000000000..b6a6c3d68
--- /dev/null
+++ b/tests/integration/recordings/responses/94d11daee205.json
@@ -0,0 +1,1178 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the name of the US captial?"
+ }
+ ],
+ "n": 2,
+ "stream": true
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " capital",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " United",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " States",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " capital",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " United",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " States",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " Washington",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " Washington",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " D",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ".C",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " D",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ".",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ".C",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ".",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " District",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "official",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": "ly",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " Columbia",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " District",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": " Columbia",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": ").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIpbpLN9VO3z9pVAidTRslxRHtL",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499919,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/recordings/responses/9f3d749cc1c8.json b/tests/integration/recordings/responses/9f3d749cc1c8.json
new file mode 100644
index 000000000..9a4539ab0
--- /dev/null
+++ b/tests/integration/recordings/responses/9f3d749cc1c8.json
@@ -0,0 +1,1150 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What's the name of the Sun in latin?"
+ }
+ ],
+ "stream": true
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " Latin",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " name",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "Sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "\"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "gen",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "itive",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "S",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "olis",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "\").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " It's",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " used",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " as",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " proper",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " name",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " Sun",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": ";",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " poets",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " also",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " sometimes",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " used",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " Greek",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "-derived",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " ep",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "ithe",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "ts",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " like",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "Pho",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "eb",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": "us",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": ".\"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIZYHVRY3J0EiPODz10HVzL7cIe",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499903,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/recordings/responses/c791119e6359.json b/tests/integration/recordings/responses/c791119e6359.json
new file mode 100644
index 000000000..6ac123e92
--- /dev/null
+++ b/tests/integration/recordings/responses/c791119e6359.json
@@ -0,0 +1,98 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+ }
+ ],
+ "stream": false,
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get the weather in a given city",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "city": {
+ "type": "string",
+ "description": "The city to get the weather for"
+ }
+ }
+ }
+ }
+ }
+ ]
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": {
+ "__type__": "openai.types.chat.chat_completion.ChatCompletion",
+ "__data__": {
+ "id": "chatcmpl-CECIwq9Odd0mOJMmw7ytv8iEazH4H",
+ "choices": [
+ {
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "logprobs": null,
+ "message": {
+ "content": null,
+ "refusal": null,
+ "role": "assistant",
+ "annotations": [],
+ "audio": null,
+ "function_call": null,
+ "tool_calls": [
+ {
+ "id": "call_yw18spRc1jjUlEyabbXBhB33",
+ "function": {
+ "arguments": "{\"city\":\"Tokyo\"}",
+ "name": "get_weather"
+ },
+ "type": "function"
+ }
+ ]
+ },
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499926,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": {
+ "completion_tokens": 88,
+ "prompt_tokens": 151,
+ "total_tokens": 239,
+ "completion_tokens_details": {
+ "accepted_prediction_tokens": 0,
+ "audio_tokens": 0,
+ "reasoning_tokens": 64,
+ "rejected_prediction_tokens": 0
+ },
+ "prompt_tokens_details": {
+ "audio_tokens": 0,
+ "cached_tokens": 0
+ }
+ },
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ "is_streaming": false
+ }
+}
diff --git a/tests/integration/recordings/responses/d3e27b7234e2.json b/tests/integration/recordings/responses/d3e27b7234e2.json
new file mode 100644
index 000000000..7f266c392
--- /dev/null
+++ b/tests/integration/recordings/responses/d3e27b7234e2.json
@@ -0,0 +1,2150 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What's the name of the Sun in latin?"
+ }
+ ],
+ "n": 2,
+ "stream": true
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "In",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Latin",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Sun",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " called",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "Sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " gen",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "itive",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Latin",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " name",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " masculine",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "Sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " name",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "s",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " also",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\u014d",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " used",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "l",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " for",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "),",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " gen",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Roman",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "itive",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " sun",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " god",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "s",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\u014d",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "e",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "lis",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ".g",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\".",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ".,",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " ",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Sol",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " As",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " Inv",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " an",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "ict",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " epit",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "us",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "het",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " it",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\u2019s",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " also",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " called",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " \"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "Pho",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "eb",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "us",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": "\"",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " in",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": " poetry",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": ".",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIdmgM7bbQr6YefuUbY4cycibvm",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 1,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499907,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/recordings/responses/fb785db7fafd.json b/tests/integration/recordings/responses/fb785db7fafd.json
new file mode 100644
index 000000000..086d211e8
--- /dev/null
+++ b/tests/integration/recordings/responses/fb785db7fafd.json
@@ -0,0 +1,310 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
+ }
+ ],
+ "stream": true,
+ "tools": [
+ {
+ "type": "function",
+ "function": {
+ "name": "get_weather",
+ "description": "Get the weather in a given city",
+ "parameters": {
+ "type": "object",
+ "properties": {
+ "city": {
+ "type": "string",
+ "description": "The city to get the weather for"
+ }
+ }
+ }
+ }
+ }
+ ]
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": "call_TMbEoYn9q0ZKtoxav5LpD9Ts",
+ "function": {
+ "arguments": "",
+ "name": "get_weather"
+ },
+ "type": "function"
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": null,
+ "function": {
+ "arguments": "{\"",
+ "name": null
+ },
+ "type": null
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": null,
+ "function": {
+ "arguments": "city",
+ "name": null
+ },
+ "type": null
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": null,
+ "function": {
+ "arguments": "\":\"",
+ "name": null
+ },
+ "type": null
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": null,
+ "function": {
+ "arguments": "Tokyo",
+ "name": null
+ },
+ "type": null
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": [
+ {
+ "index": 0,
+ "id": null,
+ "function": {
+ "arguments": "\"}",
+ "name": null
+ },
+ "type": null
+ }
+ ]
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "tool_calls",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499912,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/recordings/responses/ff3271401fb4.json b/tests/integration/recordings/responses/ff3271401fb4.json
new file mode 100644
index 000000000..bf7ec89f7
--- /dev/null
+++ b/tests/integration/recordings/responses/ff3271401fb4.json
@@ -0,0 +1,556 @@
+{
+ "request": {
+ "method": "POST",
+ "url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
+ "headers": {},
+ "body": {
+ "model": "gpt-5-mini",
+ "messages": [
+ {
+ "role": "user",
+ "content": "What is the name of the US captial?"
+ }
+ ],
+ "stream": true
+ },
+ "endpoint": "/v1/chat/completions",
+ "model": "gpt-5-mini"
+ },
+ "response": {
+ "body": [
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "",
+ "choices": [],
+ "created": 0,
+ "model": "",
+ "object": "",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null,
+ "prompt_filter_results": [
+ {
+ "prompt_index": 0,
+ "content_filter_results": {}
+ }
+ ]
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": "",
+ "function_call": null,
+ "refusal": null,
+ "role": "assistant",
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": "The",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " capital",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " the",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " United",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " States",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " is",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " Washington",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": ",",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " D",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": ".C",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": ".",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " (",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": "District",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " of",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": " Columbia",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": ").",
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": null,
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ },
+ {
+ "__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
+ "__data__": {
+ "id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
+ "choices": [
+ {
+ "delta": {
+ "content": null,
+ "function_call": null,
+ "refusal": null,
+ "role": null,
+ "tool_calls": null
+ },
+ "finish_reason": "stop",
+ "index": 0,
+ "logprobs": null,
+ "content_filter_results": {}
+ }
+ ],
+ "created": 1757499916,
+ "model": "gpt-5-mini-2025-08-07",
+ "object": "chat.completion.chunk",
+ "service_tier": null,
+ "system_fingerprint": null,
+ "usage": null
+ }
+ }
+ ],
+ "is_streaming": true
+ }
+}
diff --git a/tests/integration/telemetry/test_openai_telemetry.py b/tests/integration/telemetry/test_openai_telemetry.py
index cdd9b6702..b3ffb6b09 100644
--- a/tests/integration/telemetry/test_openai_telemetry.py
+++ b/tests/integration/telemetry/test_openai_telemetry.py
@@ -49,16 +49,13 @@ def setup_openai_telemetry_data(llama_stack_client, text_model_id):
traces = llama_stack_client.telemetry.query_traces(limit=10)
if len(traces) >= 5: # 5 OpenAI completion traces
break
- time.sleep(1)
+ time.sleep(0.1)
if len(traces) < 5:
pytest.fail(
f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
)
- # Wait for 5 seconds to ensure traces has completed logging
- time.sleep(5)
-
yield
@@ -185,11 +182,13 @@ def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
assert len(response.choices) > 0, "Response should have at least one choice"
# Wait for telemetry to be recorded
- time.sleep(3)
-
- # Check that we have more traces now
- final_traces = llama_stack_client.telemetry.query_traces(limit=20)
- final_count = len(final_traces)
+ start_time = time.time()
+ while time.time() - start_time < 30:
+ final_traces = llama_stack_client.telemetry.query_traces(limit=20)
+ final_count = len(final_traces)
+ if final_count > initial_count:
+ break
+ time.sleep(0.1)
# Should have at least as many traces as before (might have more due to other activity)
assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
diff --git a/tests/integration/telemetry/test_telemetry.py b/tests/integration/telemetry/test_telemetry.py
index d363edbc0..e86da954e 100644
--- a/tests/integration/telemetry/test_telemetry.py
+++ b/tests/integration/telemetry/test_telemetry.py
@@ -42,14 +42,11 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
traces = llama_stack_client.telemetry.query_traces(limit=10)
if len(traces) >= 4:
break
- time.sleep(1)
+ time.sleep(0.1)
if len(traces) < 4:
pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
- # Wait for 5 seconds to ensure traces has completed logging
- time.sleep(5)
-
yield
diff --git a/tests/integration/telemetry/test_telemetry_metrics.py b/tests/integration/telemetry/test_telemetry_metrics.py
index 4ba2bd2d9..1d8312ae2 100644
--- a/tests/integration/telemetry/test_telemetry_metrics.py
+++ b/tests/integration/telemetry/test_telemetry_metrics.py
@@ -46,10 +46,7 @@ def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_i
break
except Exception:
pass
- time.sleep(1)
-
- # Wait additional time to ensure all metrics are processed
- time.sleep(5)
+ time.sleep(0.1)
# Return the token lists for use in tests
return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
diff --git a/tests/integration/tool_runtime/test_rag_tool.py b/tests/integration/tool_runtime/test_rag_tool.py
index b208500d8..b78c39af8 100644
--- a/tests/integration/tool_runtime/test_rag_tool.py
+++ b/tests/integration/tool_runtime/test_rag_tool.py
@@ -183,6 +183,110 @@ def test_vector_db_insert_from_url_and_query(
assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
+def test_rag_tool_openai_apis(client_with_empty_registry, embedding_model_id, embedding_dimension):
+ vector_db_id = "test_openai_vector_db"
+
+ client_with_empty_registry.vector_dbs.register(
+ vector_db_id=vector_db_id,
+ embedding_model=embedding_model_id,
+ embedding_dimension=embedding_dimension,
+ )
+
+ available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+ actual_vector_db_id = available_vector_dbs[0]
+
+ # different document formats that should work with OpenAI APIs
+ documents = [
+ Document(
+ document_id="text-doc",
+ content="This is a plain text document about machine learning algorithms.",
+ metadata={"type": "text", "category": "AI"},
+ ),
+ Document(
+ document_id="url-doc",
+ content="https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/chat.rst",
+ mime_type="text/plain",
+ metadata={"type": "url", "source": "pytorch"},
+ ),
+ Document(
+ document_id="data-url-doc",
+ content="data:text/plain;base64,VGhpcyBpcyBhIGRhdGEgVVJMIGRvY3VtZW50IGFib3V0IGRlZXAgbGVhcm5pbmcu", # "This is a data URL document about deep learning."
+ metadata={"type": "data_url", "encoding": "base64"},
+ ),
+ ]
+
+ client_with_empty_registry.tool_runtime.rag_tool.insert(
+ documents=documents,
+ vector_db_id=actual_vector_db_id,
+ chunk_size_in_tokens=256,
+ )
+
+ files_list = client_with_empty_registry.files.list()
+ assert len(files_list.data) >= len(documents), (
+ f"Expected at least {len(documents)} files, got {len(files_list.data)}"
+ )
+
+ vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
+ vector_store_id=actual_vector_db_id
+ )
+ assert len(vector_store_files.data) >= len(documents), f"Expected at least {len(documents)} files in vector store"
+
+ response = client_with_empty_registry.tool_runtime.rag_tool.query(
+ vector_db_ids=[actual_vector_db_id],
+ content="Tell me about machine learning and deep learning",
+ )
+
+ assert_valid_text_response(response)
+ content_text = " ".join([chunk.text for chunk in response.content]).lower()
+ assert "machine learning" in content_text or "deep learning" in content_text
+
+
+def test_rag_tool_exception_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
+ vector_db_id = "test_exception_handling"
+
+ client_with_empty_registry.vector_dbs.register(
+ vector_db_id=vector_db_id,
+ embedding_model=embedding_model_id,
+ embedding_dimension=embedding_dimension,
+ )
+
+ available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+ actual_vector_db_id = available_vector_dbs[0]
+
+ documents = [
+ Document(
+ document_id="valid-doc",
+ content="This is a valid document that should be processed successfully.",
+ metadata={"status": "valid"},
+ ),
+ Document(
+ document_id="invalid-url-doc",
+ content="https://nonexistent-domain-12345.com/invalid.txt",
+ metadata={"status": "invalid_url"},
+ ),
+ Document(
+ document_id="another-valid-doc",
+ content="This is another valid document for testing resilience.",
+ metadata={"status": "valid"},
+ ),
+ ]
+
+ client_with_empty_registry.tool_runtime.rag_tool.insert(
+ documents=documents,
+ vector_db_id=actual_vector_db_id,
+ chunk_size_in_tokens=256,
+ )
+
+ response = client_with_empty_registry.tool_runtime.rag_tool.query(
+ vector_db_ids=[actual_vector_db_id],
+ content="valid document",
+ )
+
+ assert_valid_text_response(response)
+ content_text = " ".join([chunk.text for chunk in response.content]).lower()
+ assert "valid document" in content_text
+
+
def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
assert len(providers) > 0
@@ -249,3 +353,107 @@ def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_i
"chunk_template": "This should raise a ValueError because it is missing the proper template variables",
},
)
+
+
+def test_rag_tool_query_generation(client_with_empty_registry, embedding_model_id, embedding_dimension):
+ vector_db_id = "test_query_generation_db"
+
+ client_with_empty_registry.vector_dbs.register(
+ vector_db_id=vector_db_id,
+ embedding_model=embedding_model_id,
+ embedding_dimension=embedding_dimension,
+ )
+
+ available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+ actual_vector_db_id = available_vector_dbs[0]
+
+ documents = [
+ Document(
+ document_id="ai-doc",
+ content="Artificial intelligence and machine learning are transforming technology.",
+ metadata={"category": "AI"},
+ ),
+ Document(
+ document_id="banana-doc",
+ content="Don't bring a banana to a knife fight.",
+ metadata={"category": "wisdom"},
+ ),
+ ]
+
+ client_with_empty_registry.tool_runtime.rag_tool.insert(
+ documents=documents,
+ vector_db_id=actual_vector_db_id,
+ chunk_size_in_tokens=256,
+ )
+
+ response = client_with_empty_registry.tool_runtime.rag_tool.query(
+ vector_db_ids=[actual_vector_db_id],
+ content="Tell me about AI",
+ )
+
+ assert_valid_text_response(response)
+ content_text = " ".join([chunk.text for chunk in response.content]).lower()
+ assert "artificial intelligence" in content_text or "machine learning" in content_text
+
+
+def test_rag_tool_pdf_data_url_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
+ vector_db_id = "test_pdf_data_url_db"
+
+ client_with_empty_registry.vector_dbs.register(
+ vector_db_id=vector_db_id,
+ embedding_model=embedding_model_id,
+ embedding_dimension=embedding_dimension,
+ )
+
+ available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
+ actual_vector_db_id = available_vector_dbs[0]
+
+ sample_pdf = b"%PDF-1.3\n3 0 obj\n<>\nendobj\n4 0 obj\n<>\nstream\nx\x9c\x15\xcc1\x0e\x820\x18@\xe1\x9dS\xbcM]jk$\xd5\xd5(\x83!\x86\xa1\x17\xf8\xa3\xa5`LIh+\xd7W\xc6\xf7\r\xef\xc0\xbd\xd2\xaa\xb6,\xd5\xc5\xb1o\x0c\xa6VZ\xe3znn%\xf3o\xab\xb1\xe7\xa3:Y\xdc\x8bm\xeb\xf3&1\xc8\xd7\xd3\x97\xc82\xe6\x81\x87\xe42\xcb\x87Vb(\x12<\xdd<=}Jc\x0cL\x91\xee\xda$\xb5\xc3\xbd\xd7\xe9\x0f\x8d\x97 $\nendstream\nendobj\n1 0 obj\n<>\nendobj\n5 0 obj\n<>\nendobj\n2 0 obj\n<<\n/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]\n/Font <<\n/F1 5 0 R\n>>\n/XObject <<\n>>\n>>\nendobj\n6 0 obj\n<<\n/Producer (PyFPDF 1.7.2 http://pyfpdf.googlecode.com/)\n/Title (This is a sample title.)\n/Author (Llama Stack Developers)\n/CreationDate (D:20250312165548)\n>>\nendobj\n7 0 obj\n<<\n/Type /Catalog\n/Pages 1 0 R\n/OpenAction [3 0 R /FitH null]\n/PageLayout /OneColumn\n>>\nendobj\nxref\n0 8\n0000000000 65535 f \n0000000272 00000 n \n0000000455 00000 n \n0000000009 00000 n \n0000000087 00000 n \n0000000359 00000 n \n0000000559 00000 n \n0000000734 00000 n \ntrailer\n<<\n/Size 8\n/Root 7 0 R\n/Info 6 0 R\n>>\nstartxref\n837\n%%EOF\n"
+
+ import base64
+
+ pdf_base64 = base64.b64encode(sample_pdf).decode("utf-8")
+ pdf_data_url = f"data:application/pdf;base64,{pdf_base64}"
+
+ documents = [
+ Document(
+ document_id="test-pdf-data-url",
+ content=pdf_data_url,
+ metadata={"type": "pdf", "source": "data_url"},
+ ),
+ ]
+
+ client_with_empty_registry.tool_runtime.rag_tool.insert(
+ documents=documents,
+ vector_db_id=actual_vector_db_id,
+ chunk_size_in_tokens=256,
+ )
+
+ files_list = client_with_empty_registry.files.list()
+ assert len(files_list.data) >= 1, "PDF should have been uploaded to Files API"
+
+ pdf_file = None
+ for file in files_list.data:
+ if file.filename and "test-pdf-data-url" in file.filename:
+ pdf_file = file
+ break
+
+ assert pdf_file is not None, "PDF file should be found in Files API"
+ assert pdf_file.bytes == len(sample_pdf), f"File size should match original PDF ({len(sample_pdf)} bytes)"
+
+ file_content = client_with_empty_registry.files.retrieve_content(pdf_file.id)
+ assert file_content.startswith(b"%PDF-"), "Retrieved file should be a valid PDF"
+
+ vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
+ vector_store_id=actual_vector_db_id
+ )
+ assert len(vector_store_files.data) >= 1, "PDF should be attached to vector store"
+
+ response = client_with_empty_registry.tool_runtime.rag_tool.query(
+ vector_db_ids=[actual_vector_db_id],
+ content="sample title",
+ )
+
+ assert_valid_text_response(response)
+ content_text = " ".join([chunk.text for chunk in response.content]).lower()
+ assert "sample title" in content_text or "title" in content_text
diff --git a/tests/unit/distribution/test_inference_recordings.py b/tests/unit/distribution/test_inference_recordings.py
index c69cf319b..5740357c1 100644
--- a/tests/unit/distribution/test_inference_recordings.py
+++ b/tests/unit/distribution/test_inference_recordings.py
@@ -6,16 +6,18 @@
import tempfile
from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import AsyncMock, Mock, patch
import pytest
-from openai import AsyncOpenAI
+from openai import NOT_GIVEN, AsyncOpenAI
+from openai.types.model import Model as OpenAIModel
# Import the real Pydantic response types instead of using Mocks
from llama_stack.apis.inference import (
OpenAIAssistantMessageParam,
OpenAIChatCompletion,
OpenAIChoice,
+ OpenAICompletion,
OpenAIEmbeddingData,
OpenAIEmbeddingsResponse,
OpenAIEmbeddingUsage,
@@ -153,24 +155,22 @@ class TestInferenceRecording:
async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
"""Test that recording mode captures and stores responses."""
-
- async def mock_create(*args, **kwargs):
- return real_openai_chat_response
-
temp_storage_dir = temp_storage_dir / "test_recording_mode"
- with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
- with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
- client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
- response = await client.chat.completions.create(
- model="llama3.2:3b",
- messages=[{"role": "user", "content": "Hello, how are you?"}],
- temperature=0.7,
- max_tokens=50,
- )
+ response = await client.chat.completions.create(
+ model="llama3.2:3b",
+ messages=[{"role": "user", "content": "Hello, how are you?"}],
+ temperature=0.7,
+ max_tokens=50,
+ user=NOT_GIVEN,
+ )
- # Verify the response was returned correctly
- assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
+ # Verify the response was returned correctly
+ assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
+ client.chat.completions._post.assert_called_once()
# Verify recording was stored
storage = ResponseStorage(temp_storage_dir)
@@ -178,40 +178,74 @@ class TestInferenceRecording:
async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
"""Test that replay mode returns stored responses without making real calls."""
-
- async def mock_create(*args, **kwargs):
- return real_openai_chat_response
-
temp_storage_dir = temp_storage_dir / "test_replay_mode"
# First, record a response
- with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
- with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
- client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
- response = await client.chat.completions.create(
- model="llama3.2:3b",
- messages=[{"role": "user", "content": "Hello, how are you?"}],
- temperature=0.7,
- max_tokens=50,
- )
+ response = await client.chat.completions.create(
+ model="llama3.2:3b",
+ messages=[{"role": "user", "content": "Hello, how are you?"}],
+ temperature=0.7,
+ max_tokens=50,
+ user=NOT_GIVEN,
+ )
+ client.chat.completions._post.assert_called_once()
# Now test replay mode - should not call the original method
- with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
- with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
- client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
- response = await client.chat.completions.create(
- model="llama3.2:3b",
- messages=[{"role": "user", "content": "Hello, how are you?"}],
- temperature=0.7,
- max_tokens=50,
- )
+ response = await client.chat.completions.create(
+ model="llama3.2:3b",
+ messages=[{"role": "user", "content": "Hello, how are you?"}],
+ temperature=0.7,
+ max_tokens=50,
+ )
- # Verify we got the recorded response
- assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
+ # Verify we got the recorded response
+ assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
- # Verify the original method was NOT called
- mock_create_patch.assert_not_called()
+ # Verify the original method was NOT called
+ client.chat.completions._post.assert_not_called()
+
+ async def test_replay_mode_models(self, temp_storage_dir):
+ """Test that replay mode returns stored responses without making real model listing calls."""
+
+ async def _async_iterator(models):
+ for model in models:
+ yield model
+
+ models = [
+ OpenAIModel(id="foo", created=1, object="model", owned_by="test"),
+ OpenAIModel(id="bar", created=2, object="model", owned_by="test"),
+ ]
+
+ expected_ids = {m.id for m in models}
+
+ temp_storage_dir = temp_storage_dir / "test_replay_mode_models"
+
+ # baseline - mock works without recording
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.models._get_api_list = Mock(return_value=_async_iterator(models))
+ assert {m.id async for m in client.models.list()} == expected_ids
+ client.models._get_api_list.assert_called_once()
+
+ # record the call
+ with inference_recording(mode=InferenceMode.RECORD, storage_dir=temp_storage_dir):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.models._get_api_list = Mock(return_value=_async_iterator(models))
+ assert {m.id async for m in client.models.list()} == expected_ids
+ client.models._get_api_list.assert_called_once()
+
+ # replay the call
+ with inference_recording(mode=InferenceMode.REPLAY, storage_dir=temp_storage_dir):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.models._get_api_list = Mock(return_value=_async_iterator(models))
+ assert {m.id async for m in client.models.list()} == expected_ids
+ client.models._get_api_list.assert_not_called()
async def test_replay_missing_recording(self, temp_storage_dir):
"""Test that replay mode fails when no recording is found."""
@@ -228,36 +262,110 @@ class TestInferenceRecording:
async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
"""Test recording and replay of embeddings calls."""
- async def mock_create(*args, **kwargs):
- return real_embeddings_response
+ # baseline - mock works without recording
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
+ response = await client.embeddings.create(
+ model=real_embeddings_response.model,
+ input=["Hello world", "Test embedding"],
+ encoding_format=NOT_GIVEN,
+ )
+ assert len(response.data) == 2
+ assert response.data[0].embedding == [0.1, 0.2, 0.3]
+ client.embeddings._post.assert_called_once()
temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
# Record
- with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
- with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
- client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
- response = await client.embeddings.create(
- model="nomic-embed-text", input=["Hello world", "Test embedding"]
- )
+ response = await client.embeddings.create(
+ model=real_embeddings_response.model,
+ input=["Hello world", "Test embedding"],
+ encoding_format=NOT_GIVEN,
+ dimensions=NOT_GIVEN,
+ user=NOT_GIVEN,
+ )
- assert len(response.data) == 2
+ assert len(response.data) == 2
# Replay
- with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
- with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
- client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
- response = await client.embeddings.create(
- model="nomic-embed-text", input=["Hello world", "Test embedding"]
- )
+ response = await client.embeddings.create(
+ model=real_embeddings_response.model,
+ input=["Hello world", "Test embedding"],
+ )
- # Verify we got the recorded response
- assert len(response.data) == 2
- assert response.data[0].embedding == [0.1, 0.2, 0.3]
+ # Verify we got the recorded response
+ assert len(response.data) == 2
+ assert response.data[0].embedding == [0.1, 0.2, 0.3]
- # Verify original method was not called
- mock_create_patch.assert_not_called()
+ # Verify original method was not called
+ client.embeddings._post.assert_not_called()
+
+ async def test_completions_recording(self, temp_storage_dir):
+ real_completions_response = OpenAICompletion(
+ id="test_completion",
+ object="text_completion",
+ created=1234567890,
+ model="llama3.2:3b",
+ choices=[
+ {
+ "text": "Hello! I'm doing well, thank you for asking.",
+ "index": 0,
+ "logprobs": None,
+ "finish_reason": "stop",
+ }
+ ],
+ )
+
+ temp_storage_dir = temp_storage_dir / "test_completions_recording"
+
+ # baseline - mock works without recording
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.completions._post = AsyncMock(return_value=real_completions_response)
+ response = await client.completions.create(
+ model=real_completions_response.model,
+ prompt="Hello, how are you?",
+ temperature=0.7,
+ max_tokens=50,
+ user=NOT_GIVEN,
+ )
+ assert response.choices[0].text == real_completions_response.choices[0].text
+ client.completions._post.assert_called_once()
+
+ # Record
+ with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.completions._post = AsyncMock(return_value=real_completions_response)
+
+ response = await client.completions.create(
+ model=real_completions_response.model,
+ prompt="Hello, how are you?",
+ temperature=0.7,
+ max_tokens=50,
+ user=NOT_GIVEN,
+ )
+
+ assert response.choices[0].text == real_completions_response.choices[0].text
+ client.completions._post.assert_called_once()
+
+ # Replay
+ with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
+ client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
+ client.completions._post = AsyncMock(return_value=real_completions_response)
+ response = await client.completions.create(
+ model=real_completions_response.model,
+ prompt="Hello, how are you?",
+ temperature=0.7,
+ max_tokens=50,
+ )
+ assert response.choices[0].text == real_completions_response.choices[0].text
+ client.completions._post.assert_not_called()
async def test_live_mode(self, real_openai_chat_response):
"""Test that live mode passes through to original methods."""
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index ce0e930b1..61b16b5d1 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -6,19 +6,15 @@
import asyncio
import json
-import logging # allow-direct-logging
-import threading
import time
-from http.server import BaseHTTPRequestHandler, HTTPServer
-from typing import Any
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
import pytest
from openai.types.chat.chat_completion_chunk import (
ChatCompletionChunk as OpenAIChatCompletionChunk,
)
from openai.types.chat.chat_completion_chunk import (
- Choice as OpenAIChoice,
+ Choice as OpenAIChoiceChunk,
)
from openai.types.chat.chat_completion_chunk import (
ChoiceDelta as OpenAIChoiceDelta,
@@ -35,6 +31,9 @@ from llama_stack.apis.inference import (
ChatCompletionRequest,
ChatCompletionResponseEventType,
CompletionMessage,
+ OpenAIAssistantMessageParam,
+ OpenAIChatCompletion,
+ OpenAIChoice,
SystemMessage,
ToolChoice,
ToolConfig,
@@ -61,41 +60,6 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
# -v -s --tb=short --disable-warnings
-class MockInferenceAdapterWithSleep:
- def __init__(self, sleep_time: int, response: dict[str, Any]):
- self.httpd = None
-
- class DelayedRequestHandler(BaseHTTPRequestHandler):
- # ruff: noqa: N802
- def do_POST(self):
- time.sleep(sleep_time)
- response_body = json.dumps(response).encode("utf-8")
- self.send_response(code=200)
- self.send_header("Content-Type", "application/json")
- self.send_header("Content-Length", len(response_body))
- self.end_headers()
- self.wfile.write(response_body)
-
- self.request_handler = DelayedRequestHandler
-
- def __enter__(self):
- httpd = HTTPServer(("", 0), self.request_handler)
- self.httpd = httpd
- host, port = httpd.server_address
- httpd_thread = threading.Thread(target=httpd.serve_forever)
- httpd_thread.daemon = True # stop server if this thread terminates
- httpd_thread.start()
-
- config = VLLMInferenceAdapterConfig(url=f"http://{host}:{port}")
- inference_adapter = VLLMInferenceAdapter(config)
- return inference_adapter
-
- def __exit__(self, _exc_type, _exc_value, _traceback):
- if self.httpd:
- self.httpd.shutdown()
- self.httpd.server_close()
-
-
@pytest.fixture(scope="module")
def mock_openai_models_list():
with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
@@ -150,10 +114,12 @@ async def test_tool_call_response(vllm_inference_adapter):
"""Verify that tool call arguments from a CompletionMessage are correctly converted
into the expected JSON format."""
- # Patch the call to vllm so we can inspect the arguments sent were correct
- with patch.object(
- vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
- ) as mock_nonstream_completion:
+ # Patch the client property to avoid instantiating a real AsyncOpenAI client
+ with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
+ mock_client = MagicMock()
+ mock_client.chat.completions.create = AsyncMock()
+ mock_create_client.return_value = mock_client
+
messages = [
SystemMessage(content="You are a helpful assistant"),
UserMessage(content="How many?"),
@@ -179,7 +145,7 @@ async def test_tool_call_response(vllm_inference_adapter):
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
)
- assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
+ assert mock_client.chat.completions.create.call_args.kwargs["messages"][2]["tool_calls"] == [
{
"id": "foo",
"type": "function",
@@ -199,7 +165,7 @@ async def test_tool_call_delta_empty_tool_call_buf():
async def mock_stream():
delta = OpenAIChoiceDelta(content="", tool_calls=None)
- choices = [OpenAIChoice(delta=delta, finish_reason="stop", index=0)]
+ choices = [OpenAIChoiceChunk(delta=delta, finish_reason="stop", index=0)]
mock_chunk = OpenAIChatCompletionChunk(
id="chunk-1",
created=1,
@@ -225,7 +191,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(
+ OpenAIChoiceChunk(
delta=OpenAIChoiceDelta(
content="",
tool_calls=[
@@ -250,7 +216,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(
+ OpenAIChoiceChunk(
delta=OpenAIChoiceDelta(
content="",
tool_calls=[
@@ -275,7 +241,9 @@ async def test_tool_call_delta_streaming_arguments_dict():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+ OpenAIChoiceChunk(
+ delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
+ )
],
)
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
@@ -299,7 +267,7 @@ async def test_multiple_tool_calls():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(
+ OpenAIChoiceChunk(
delta=OpenAIChoiceDelta(
content="",
tool_calls=[
@@ -324,7 +292,7 @@ async def test_multiple_tool_calls():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(
+ OpenAIChoiceChunk(
delta=OpenAIChoiceDelta(
content="",
tool_calls=[
@@ -349,7 +317,9 @@ async def test_multiple_tool_calls():
model="foo",
object="chat.completion.chunk",
choices=[
- OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
+ OpenAIChoiceChunk(
+ delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
+ )
],
)
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
@@ -393,59 +363,6 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
assert chunks[0].event.event_type.value == "start"
-@pytest.mark.allow_network
-def test_chat_completion_doesnt_block_event_loop(caplog):
- loop = asyncio.new_event_loop()
- loop.set_debug(True)
- caplog.set_level(logging.WARNING)
-
- # Log when event loop is blocked for more than 200ms
- loop.slow_callback_duration = 0.5
- # Sleep for 500ms in our delayed http response
- sleep_time = 0.5
-
- mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
- mock_response = {
- "id": "chatcmpl-abc123",
- "object": "chat.completion",
- "created": 1,
- "modle": "mock-model",
- "choices": [
- {
- "message": {"content": ""},
- "logprobs": None,
- "finish_reason": "stop",
- "index": 0,
- }
- ],
- }
-
- async def do_chat_completion():
- await inference_adapter.chat_completion(
- "mock-model",
- [],
- stream=False,
- tools=None,
- tool_config=ToolConfig(tool_choice=ToolChoice.auto),
- )
-
- with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
- inference_adapter.model_store = AsyncMock()
- inference_adapter.model_store.get_model.return_value = mock_model
- loop.run_until_complete(inference_adapter.initialize())
-
- # Clear the logs so far and run the actual chat completion we care about
- caplog.clear()
- loop.run_until_complete(do_chat_completion())
-
- # Ensure we don't have any asyncio warnings in the captured log
- # records from our chat completion call. A message gets logged
- # here any time we exceed the slow_callback_duration configured
- # above.
- asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
- assert not asyncio_warnings
-
-
async def test_get_params_empty_tools(vllm_inference_adapter):
request = ChatCompletionRequest(
tools=[],
@@ -641,9 +558,7 @@ async def test_health_status_success(vllm_inference_adapter):
This test verifies that the health method returns a HealthResponse with status OK, only
when the connection to the vLLM server is successful.
"""
- # Set vllm_inference_adapter.client to None to ensure _create_client is called
- vllm_inference_adapter.client = None
- with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
+ with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
# Create mock client and models
mock_client = MagicMock()
mock_models = MagicMock()
@@ -674,8 +589,7 @@ async def test_health_status_failure(vllm_inference_adapter):
This test verifies that the health method returns a HealthResponse with status ERROR
and an appropriate error message when the connection to the vLLM server fails.
"""
- vllm_inference_adapter.client = None
- with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
+ with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
# Create mock client and models
mock_client = MagicMock()
mock_models = MagicMock()
@@ -697,3 +611,48 @@ async def test_health_status_failure(vllm_inference_adapter):
assert "Health check failed: Connection failed" in health_response["message"]
mock_models.list.assert_called_once()
+
+
+async def test_openai_chat_completion_is_async(vllm_inference_adapter):
+ """
+ Verify that openai_chat_completion is async and doesn't block the event loop.
+
+ To do this we mock the underlying inference with a sleep, start multiple
+ inference calls in parallel, and ensure the total time taken is less
+ than the sum of the individual sleep times.
+ """
+ sleep_time = 0.5
+
+ async def mock_create(*args, **kwargs):
+ await asyncio.sleep(sleep_time)
+ return OpenAIChatCompletion(
+ id="chatcmpl-abc123",
+ created=1,
+ model="mock-model",
+ choices=[
+ OpenAIChoice(
+ message=OpenAIAssistantMessageParam(
+ content="nothing interesting",
+ ),
+ finish_reason="stop",
+ index=0,
+ )
+ ],
+ )
+
+ async def do_inference():
+ await vllm_inference_adapter.openai_chat_completion(
+ "mock-model", messages=["one fish", "two fish"], stream=False
+ )
+
+ with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
+ mock_client = MagicMock()
+ mock_client.chat.completions.create = AsyncMock(side_effect=mock_create)
+ mock_create_client.return_value = mock_client
+
+ start_time = time.time()
+ await asyncio.gather(do_inference(), do_inference(), do_inference(), do_inference())
+ total_time = time.time() - start_time
+
+ assert mock_create_client.call_count == 4 # no cheating
+ assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py
new file mode 100644
index 000000000..1ff07bbbe
--- /dev/null
+++ b/tests/unit/providers/test_bedrock.py
@@ -0,0 +1,53 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.providers.remote.inference.bedrock.bedrock import (
+ _get_region_prefix,
+ _to_inference_profile_id,
+)
+
+
+def test_region_prefixes():
+ assert _get_region_prefix("us-east-1") == "us."
+ assert _get_region_prefix("eu-west-1") == "eu."
+ assert _get_region_prefix("ap-south-1") == "ap."
+ assert _get_region_prefix("ca-central-1") == "us."
+
+ # Test case insensitive
+ assert _get_region_prefix("US-EAST-1") == "us."
+ assert _get_region_prefix("EU-WEST-1") == "eu."
+ assert _get_region_prefix("Ap-South-1") == "ap."
+
+ # Test None region
+ assert _get_region_prefix(None) == "us."
+
+
+def test_model_id_conversion():
+ # Basic conversion
+ assert (
+ _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "us-east-1") == "us.meta.llama3-1-70b-instruct-v1:0"
+ )
+
+ # Already has prefix
+ assert (
+ _to_inference_profile_id("us.meta.llama3-1-70b-instruct-v1:0", "us-east-1")
+ == "us.meta.llama3-1-70b-instruct-v1:0"
+ )
+
+ # ARN should be returned unchanged
+ arn = "arn:aws:bedrock:us-east-1:123456789012:inference-profile/us.meta.llama3-1-70b-instruct-v1:0"
+ assert _to_inference_profile_id(arn, "us-east-1") == arn
+
+ # ARN should be returned unchanged even without region
+ assert _to_inference_profile_id(arn) == arn
+
+ # Optional region parameter defaults to us-east-1
+ assert _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0") == "us.meta.llama3-1-70b-instruct-v1:0"
+
+ # Different regions work with optional parameter
+ assert (
+ _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "eu-west-1") == "eu.meta.llama3-1-70b-instruct-v1:0"
+ )
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 90b229262..590bdd1d2 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -178,3 +178,41 @@ def test_content_from_data_and_mime_type_both_encodings_fail():
# Should raise an exception instead of returning empty string
with pytest.raises(UnicodeDecodeError):
content_from_data_and_mime_type(data, mime_type)
+
+
+async def test_memory_tool_error_handling():
+ """Test that memory tool handles various failures gracefully without crashing."""
+ from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
+ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
+
+ config = RagToolRuntimeConfig()
+ memory_tool = MemoryToolRuntimeImpl(
+ config=config,
+ vector_io_api=AsyncMock(),
+ inference_api=AsyncMock(),
+ files_api=AsyncMock(),
+ )
+
+ docs = [
+ RAGDocument(document_id="good_doc", content="Good content", metadata={}),
+ RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}),
+ RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}),
+ ]
+
+ mock_file1 = MagicMock()
+ mock_file1.id = "file_good1"
+ mock_file2 = MagicMock()
+ mock_file2.id = "file_good2"
+ memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2]
+
+ with patch("httpx.AsyncClient") as mock_client:
+ mock_instance = AsyncMock()
+ mock_instance.get.side_effect = Exception("Bad URL")
+ mock_client.return_value.__aenter__.return_value = mock_instance
+
+ # won't raise exception despite one document failing
+ await memory_tool.insert(docs, "vector_store_123")
+
+ # processed 2 documents successfully, skipped 1
+ assert memory_tool.files_api.openai_upload_file.call_count == 2
+ assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2
diff --git a/tests/unit/providers/vector_io/test_vector_utils.py b/tests/unit/providers/vector_io/test_vector_utils.py
index a5d803a82..10ebe5bfb 100644
--- a/tests/unit/providers/vector_io/test_vector_utils.py
+++ b/tests/unit/providers/vector_io/test_vector_utils.py
@@ -26,9 +26,9 @@ def test_generate_chunk_id():
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
assert chunk_ids == [
- "177a1368-f6a8-0c50-6e92-18677f2c3de3",
- "bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
- "f68df25d-d9aa-ab4d-5684-64a233add20d",
+ "31d1f9a3-c8d2-66e7-3c37-af2acd329778",
+ "d07dade7-29c0-cda7-df29-0249a1dcbc3e",
+ "d14f75a1-5855-7f72-2c78-d9fc4275a346",
]
@@ -36,14 +36,14 @@ def test_generate_chunk_id_with_window():
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
- assert chunk_id1 == "149018fe-d0eb-0f8d-5f7f-726bdd2aeedb"
- assert chunk_id2 == "4562c1ee-9971-1f3b-51a6-7d05e5211154"
+ assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
+ assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
def test_chunk_id():
# Test with existing chunk ID
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
- assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
+ assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
# Test with document ID in metadata
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index 730f54a05..f6d63490a 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -65,6 +65,9 @@ async def test_inference_store_pagination_basic():
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages)
+ # Wait for all queued writes to complete
+ await store.flush()
+
# Test 1: First page with limit=2, descending order (default)
result = await store.list_chat_completions(limit=2, order=Order.desc)
assert len(result.data) == 2
@@ -108,6 +111,9 @@ async def test_inference_store_pagination_ascending():
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages)
+ # Wait for all queued writes to complete
+ await store.flush()
+
# Test ascending order pagination
result = await store.list_chat_completions(limit=1, order=Order.asc)
assert len(result.data) == 1
@@ -143,6 +149,9 @@ async def test_inference_store_pagination_with_model_filter():
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages)
+ # Wait for all queued writes to complete
+ await store.flush()
+
# Test pagination with model filter
result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
assert len(result.data) == 1
@@ -190,6 +199,9 @@ async def test_inference_store_pagination_no_limit():
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
await store.store_chat_completion(completion, input_messages)
+ # Wait for all queued writes to complete
+ await store.flush()
+
# Test without limit
result = await store.list_chat_completions(order=Order.desc)
assert len(result.data) == 2
diff --git a/uv.lock b/uv.lock
index 2788c6fef..065eb3876 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,5 +1,5 @@
version = 1
-revision = 3
+revision = 2
requires-python = ">=3.12"
resolution-markers = [
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
@@ -1839,7 +1839,6 @@ test = [
{ name = "datasets" },
{ name = "mcp" },
{ name = "milvus-lite" },
- { name = "openai" },
{ name = "psycopg2-binary" },
{ name = "pymilvus" },
{ name = "pypdf" },
@@ -1865,7 +1864,6 @@ unit = [
{ name = "milvus-lite" },
{ name = "moto", extra = ["s3"] },
{ name = "ollama" },
- { name = "openai" },
{ name = "psycopg2-binary" },
{ name = "pymilvus" },
{ name = "pypdf" },
@@ -1889,7 +1887,7 @@ requires-dist = [
{ name = "jsonschema" },
{ name = "llama-stack-client", specifier = ">=0.2.21" },
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.21" },
- { name = "openai", specifier = ">=1.99.6" },
+ { name = "openai", specifier = ">=1.100.0" },
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
{ name = "pandas", marker = "extra == 'ui'" },
@@ -1959,7 +1957,6 @@ test = [
{ name = "datasets", specifier = ">=4.0.0" },
{ name = "mcp" },
{ name = "milvus-lite", specifier = ">=2.5.0" },
- { name = "openai", specifier = ">=1.100.0" },
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pymilvus", specifier = ">=2.6.1" },
{ name = "pypdf" },
@@ -1984,7 +1981,6 @@ unit = [
{ name = "milvus-lite", specifier = ">=2.5.0" },
{ name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
{ name = "ollama" },
- { name = "openai" },
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
{ name = "pymilvus", specifier = ">=2.6.1" },
{ name = "pypdf" },
@@ -2023,7 +2019,7 @@ wheels = [
[[package]]
name = "locust"
-version = "2.39.1"
+version = "2.40.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "configargparse" },
@@ -2035,6 +2031,7 @@ dependencies = [
{ name = "locust-cloud" },
{ name = "msgpack" },
{ name = "psutil" },
+ { name = "pytest" },
{ name = "python-engineio" },
{ name = "python-socketio", extra = ["client"] },
{ name = "pywin32", marker = "sys_platform == 'win32'" },
@@ -2043,9 +2040,9 @@ dependencies = [
{ name = "setuptools" },
{ name = "werkzeug" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/95/c8/10aa5445c404eed389b56877e6714c1787190cc09dd70059ce3765979ec5/locust-2.39.1.tar.gz", hash = "sha256:6bdd19e27edf9a1c84391d6cf6e9a737dfb832be7dfbf39053191ae31b9cc498", size = 1409902, upload-time = "2025-08-29T17:41:01.544Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/22/82f40176473a98c9479bed667d3ad21bb859d2cb67f6880a6b0b6a725e45/locust-2.40.1.tar.gz", hash = "sha256:5bde76c1cf7e412071670f926f34844e119210c93f07a4cf9fc4cb93c60a578a", size = 1411606, upload-time = "2025-09-05T15:57:35.76Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/ec/b3/b2f4b2ca88b1e72eba7be2b2982533b887f8b709d222db78eb9602aa5121/locust-2.39.1-py3-none-any.whl", hash = "sha256:fd5148f2f1a4ed34aee968abc4393674e69d1b5e1b54db50a397f6eb09ce0b04", size = 1428155, upload-time = "2025-08-29T17:41:00.245Z" },
+ { url = "https://files.pythonhosted.org/packages/3b/e6/9c6335ab16becf4f8ad3da6083ab78793c56ec1ca496d6f7c74660c21c3f/locust-2.40.1-py3-none-any.whl", hash = "sha256:ef0517f9bb5ed0afa7035014faaf944802917e07da8649461aaaf5e5f3ba8a65", size = 1430154, upload-time = "2025-09-05T15:57:33.233Z" },
]
[[package]]
@@ -2619,7 +2616,7 @@ wheels = [
[[package]]
name = "openai"
-version = "1.102.0"
+version = "1.107.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -2631,9 +2628,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/88/67/d6498de300f83ff57a79cb7aa96ef3bef8d6f070c3ded0f1b5b45442a6bc/openai-1.107.0.tar.gz", hash = "sha256:43e04927584e57d0e9e640ee0077c78baf8150098be96ebd5c512539b6c4e9a4", size = 566056, upload-time = "2025-09-08T19:25:47.604Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" },
+ { url = "https://files.pythonhosted.org/packages/91/ed/e8a4fd20390f2858b95227c288df8fe0c835f7c77625f7583609161684ba/openai-1.107.0-py3-none-any.whl", hash = "sha256:3dcfa3cbb116bd6924b27913b8da28c4a787379ff60049588547a1013e6d6438", size = 950968, upload-time = "2025-09-08T19:25:45.552Z" },
]
[[package]]
@@ -3540,7 +3537,7 @@ wheels = [
[[package]]
name = "pytest"
-version = "8.4.1"
+version = "8.4.2"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
@@ -3549,9 +3546,9 @@ dependencies = [
{ name = "pluggy" },
{ name = "pygments" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
+ { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
]
[[package]]