mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 04:04:14 +00:00
Merge branch 'main' into use-openai-for-ollama
This commit is contained in:
commit
91fb6f42cb
74 changed files with 8761 additions and 971 deletions
24
.github/workflows/conformance.yml
vendored
24
.github/workflows/conformance.yml
vendored
|
@ -13,11 +13,8 @@ on:
|
|||
branches: [ main ]
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'tests/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
- 'docs/_static/llama-stack-spec.yaml'
|
||||
- 'docs/_static/llama-stack-spec.html'
|
||||
- '.github/workflows/conformance.yml' # This workflow itself
|
||||
|
||||
concurrency:
|
||||
|
@ -43,10 +40,27 @@ jobs:
|
|||
ref: ${{ github.event.pull_request.base.ref }}
|
||||
path: 'base'
|
||||
|
||||
# Cache oasdiff to avoid checksum failures and speed up builds
|
||||
- name: Cache oasdiff
|
||||
id: cache-oasdiff
|
||||
uses: actions/cache@0400d5f644dc74513175e3cd8d07132dd4860809
|
||||
with:
|
||||
path: ~/oasdiff
|
||||
key: oasdiff-${{ runner.os }}
|
||||
|
||||
# Install oasdiff: https://github.com/oasdiff/oasdiff, a tool for detecting breaking changes in OpenAPI specs.
|
||||
- name: Install oasdiff
|
||||
if: steps.cache-oasdiff.outputs.cache-hit != 'true'
|
||||
run: |
|
||||
curl -fsSL https://raw.githubusercontent.com/oasdiff/oasdiff/main/install.sh | sh
|
||||
cp /usr/local/bin/oasdiff ~/oasdiff
|
||||
|
||||
# Setup cached oasdiff
|
||||
- name: Setup cached oasdiff
|
||||
if: steps.cache-oasdiff.outputs.cache-hit == 'true'
|
||||
run: |
|
||||
sudo cp ~/oasdiff /usr/local/bin/oasdiff
|
||||
sudo chmod +x /usr/local/bin/oasdiff
|
||||
|
||||
# Run oasdiff to detect breaking changes in the API specification
|
||||
# This step will fail if incompatible changes are detected, preventing breaking changes from being merged
|
||||
|
|
24
.github/workflows/pre-commit.yml
vendored
24
.github/workflows/pre-commit.yml
vendored
|
@ -47,11 +47,21 @@ jobs:
|
|||
run: npm ci
|
||||
working-directory: llama_stack/ui
|
||||
|
||||
- uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
- name: Run pre-commit
|
||||
id: precommit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check pre-commit results
|
||||
if: steps.precommit.outcome == 'failure'
|
||||
run: |
|
||||
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
||||
echo "::warning::Some pre-commit hooks failed. Check the output above for details."
|
||||
exit 1
|
||||
|
||||
- name: Debug
|
||||
run: |
|
||||
echo "github.ref: ${{ github.ref }}"
|
||||
|
@ -79,17 +89,23 @@ jobs:
|
|||
echo "No changes to commit"
|
||||
fi
|
||||
|
||||
- name: Verify if there are any diff files after pre-commit
|
||||
- name: Verify no uncommitted changes
|
||||
if: github.actor != 'dependabot[bot]'
|
||||
run: |
|
||||
git diff --exit-code || (echo "There are uncommitted changes, run pre-commit locally and commit again" && exit 1)
|
||||
if ! git diff --exit-code; then
|
||||
echo "::error::There are uncommitted changes after pre-commit. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
||||
echo "::warning::Files with changes:"
|
||||
git diff --name-status
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Verify if there are any new files after pre-commit
|
||||
if: github.actor != 'dependabot[bot]'
|
||||
run: |
|
||||
unstaged_files=$(git ls-files --others --exclude-standard)
|
||||
if [ -n "$unstaged_files" ]; then
|
||||
echo "There are uncommitted new files, run pre-commit locally and commit again"
|
||||
echo "::error::There are new untracked files after pre-commit. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
||||
echo "::warning::New files:"
|
||||
echo "$unstaged_files"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
@ -34,13 +34,12 @@ This data enables data-driven architectural decisions and performance optimizati
|
|||
|
||||
**1. Deploy base k8s infrastructure:**
|
||||
```bash
|
||||
cd ../k8s
|
||||
cd ../../docs/source/distributions/k8s
|
||||
./apply.sh
|
||||
```
|
||||
|
||||
**2. Deploy benchmark components:**
|
||||
```bash
|
||||
cd ../k8s-benchmark
|
||||
./apply.sh
|
||||
```
|
||||
|
||||
|
@ -56,7 +55,6 @@ kubectl get pods
|
|||
|
||||
**Benchmark Llama Stack (default):**
|
||||
```bash
|
||||
cd docs/source/distributions/k8s-benchmark/
|
||||
./run-benchmark.sh
|
||||
```
|
||||
|
|
@ -14,7 +14,7 @@ import os
|
|||
import random
|
||||
import statistics
|
||||
import time
|
||||
from typing import Tuple
|
||||
|
||||
import aiohttp
|
||||
|
||||
|
||||
|
@ -56,18 +56,10 @@ class BenchmarkStats:
|
|||
total_time = self.end_time - self.start_time
|
||||
success_rate = (self.success_count / self.total_requests) * 100
|
||||
|
||||
print(f"\n{'='*60}")
|
||||
print(f"BENCHMARK RESULTS")
|
||||
print(f"{'='*60}")
|
||||
print(f"Total time: {total_time:.2f}s")
|
||||
print(f"Concurrent users: {self.concurrent_users}")
|
||||
print(f"Total requests: {self.total_requests}")
|
||||
print(f"Successful requests: {self.success_count}")
|
||||
print(f"Failed requests: {len(self.errors)}")
|
||||
print(f"Success rate: {success_rate:.1f}%")
|
||||
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
||||
print(f"\n{'=' * 60}")
|
||||
print("BENCHMARK RESULTS")
|
||||
|
||||
print(f"\nResponse Time Statistics:")
|
||||
print("\nResponse Time Statistics:")
|
||||
print(f" Mean: {statistics.mean(self.response_times):.3f}s")
|
||||
print(f" Median: {statistics.median(self.response_times):.3f}s")
|
||||
print(f" Min: {min(self.response_times):.3f}s")
|
||||
|
@ -78,14 +70,14 @@ class BenchmarkStats:
|
|||
|
||||
percentiles = [50, 90, 95, 99]
|
||||
sorted_times = sorted(self.response_times)
|
||||
print(f"\nPercentiles:")
|
||||
print("\nPercentiles:")
|
||||
for p in percentiles:
|
||||
idx = int(len(sorted_times) * p / 100) - 1
|
||||
idx = max(0, min(idx, len(sorted_times) - 1))
|
||||
print(f" P{p}: {sorted_times[idx]:.3f}s")
|
||||
|
||||
if self.ttft_times:
|
||||
print(f"\nTime to First Token (TTFT) Statistics:")
|
||||
print("\nTime to First Token (TTFT) Statistics:")
|
||||
print(f" Mean: {statistics.mean(self.ttft_times):.3f}s")
|
||||
print(f" Median: {statistics.median(self.ttft_times):.3f}s")
|
||||
print(f" Min: {min(self.ttft_times):.3f}s")
|
||||
|
@ -95,26 +87,35 @@ class BenchmarkStats:
|
|||
print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s")
|
||||
|
||||
sorted_ttft = sorted(self.ttft_times)
|
||||
print(f"\nTTFT Percentiles:")
|
||||
print("\nTTFT Percentiles:")
|
||||
for p in percentiles:
|
||||
idx = int(len(sorted_ttft) * p / 100) - 1
|
||||
idx = max(0, min(idx, len(sorted_ttft) - 1))
|
||||
print(f" P{p}: {sorted_ttft[idx]:.3f}s")
|
||||
|
||||
if self.chunks_received:
|
||||
print(f"\nStreaming Statistics:")
|
||||
print("\nStreaming Statistics:")
|
||||
print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}")
|
||||
print(f" Total chunks received: {sum(self.chunks_received)}")
|
||||
|
||||
print(f"{'=' * 60}")
|
||||
print(f"Total time: {total_time:.2f}s")
|
||||
print(f"Concurrent users: {self.concurrent_users}")
|
||||
print(f"Total requests: {self.total_requests}")
|
||||
print(f"Successful requests: {self.success_count}")
|
||||
print(f"Failed requests: {len(self.errors)}")
|
||||
print(f"Success rate: {success_rate:.1f}%")
|
||||
print(f"Requests per second: {self.success_count / total_time:.2f}")
|
||||
|
||||
if self.errors:
|
||||
print(f"\nErrors (showing first 5):")
|
||||
print("\nErrors (showing first 5):")
|
||||
for error in self.errors[:5]:
|
||||
print(f" {error}")
|
||||
|
||||
|
||||
class LlamaStackBenchmark:
|
||||
def __init__(self, base_url: str, model_id: str):
|
||||
self.base_url = base_url.rstrip('/')
|
||||
self.base_url = base_url.rstrip("/")
|
||||
self.model_id = model_id
|
||||
self.headers = {"Content-Type": "application/json"}
|
||||
self.test_messages = [
|
||||
|
@ -125,20 +126,14 @@ class LlamaStackBenchmark:
|
|||
[
|
||||
{"role": "user", "content": "What is machine learning?"},
|
||||
{"role": "assistant", "content": "Machine learning is a subset of AI..."},
|
||||
{"role": "user", "content": "Can you give me a practical example?"}
|
||||
]
|
||||
{"role": "user", "content": "Can you give me a practical example?"},
|
||||
],
|
||||
]
|
||||
|
||||
|
||||
async def make_async_streaming_request(self) -> Tuple[float, int, float | None, str | None]:
|
||||
async def make_async_streaming_request(self) -> tuple[float, int, float | None, str | None]:
|
||||
"""Make a single async streaming chat completion request."""
|
||||
messages = random.choice(self.test_messages)
|
||||
payload = {
|
||||
"model": self.model_id,
|
||||
"messages": messages,
|
||||
"stream": True,
|
||||
"max_tokens": 100
|
||||
}
|
||||
payload = {"model": self.model_id, "messages": messages, "stream": True, "max_tokens": 100}
|
||||
|
||||
start_time = time.time()
|
||||
chunks_received = 0
|
||||
|
@ -152,17 +147,17 @@ class LlamaStackBenchmark:
|
|||
f"{self.base_url}/chat/completions",
|
||||
headers=self.headers,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=30)
|
||||
timeout=aiohttp.ClientTimeout(total=30),
|
||||
) as response:
|
||||
if response.status == 200:
|
||||
async for line in response.content:
|
||||
if line:
|
||||
line_str = line.decode('utf-8').strip()
|
||||
if line_str.startswith('data: '):
|
||||
line_str = line.decode("utf-8").strip()
|
||||
if line_str.startswith("data: "):
|
||||
chunks_received += 1
|
||||
if ttft is None:
|
||||
ttft = time.time() - start_time
|
||||
if line_str == 'data: [DONE]':
|
||||
if line_str == "data: [DONE]":
|
||||
break
|
||||
|
||||
if chunks_received == 0:
|
||||
|
@ -179,7 +174,6 @@ class LlamaStackBenchmark:
|
|||
response_time = time.time() - start_time
|
||||
return response_time, chunks_received, ttft, error
|
||||
|
||||
|
||||
async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats:
|
||||
"""Run benchmark using async requests for specified duration."""
|
||||
stats = BenchmarkStats()
|
||||
|
@ -191,7 +185,7 @@ class LlamaStackBenchmark:
|
|||
print(f"Model: {self.model_id}")
|
||||
|
||||
connector = aiohttp.TCPConnector(limit=concurrent_users)
|
||||
async with aiohttp.ClientSession(connector=connector) as session:
|
||||
async with aiohttp.ClientSession(connector=connector):
|
||||
|
||||
async def worker(worker_id: int):
|
||||
"""Worker that sends requests sequentially until canceled."""
|
||||
|
@ -215,7 +209,9 @@ class LlamaStackBenchmark:
|
|||
await asyncio.sleep(1) # Report every second
|
||||
if time.time() >= last_report_time + 10: # Report every 10 seconds
|
||||
elapsed = time.time() - stats.start_time
|
||||
print(f"Completed: {stats.total_requests} requests in {elapsed:.1f}s")
|
||||
print(
|
||||
f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}"
|
||||
)
|
||||
last_report_time = time.time()
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
|
@ -240,14 +236,16 @@ class LlamaStackBenchmark:
|
|||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool")
|
||||
parser.add_argument("--base-url", default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
||||
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)")
|
||||
parser.add_argument("--model", default=os.getenv("INFERENCE_MODEL", "test-model"),
|
||||
help="Model ID to use for requests")
|
||||
parser.add_argument("--duration", type=int, default=60,
|
||||
help="Duration in seconds to run benchmark (default: 60)")
|
||||
parser.add_argument("--concurrent", type=int, default=10,
|
||||
help="Number of concurrent users (default: 10)")
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"),
|
||||
help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model", default=os.getenv("INFERENCE_MODEL", "test-model"), help="Model ID to use for requests"
|
||||
)
|
||||
parser.add_argument("--duration", type=int, default=60, help="Duration in seconds to run benchmark (default: 60)")
|
||||
parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent users (default: 10)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
|
@ -11,16 +11,18 @@ OpenAI-compatible mock server that returns:
|
|||
- Valid OpenAI-formatted chat completion responses with dynamic content
|
||||
"""
|
||||
|
||||
from flask import Flask, request, jsonify, Response
|
||||
import time
|
||||
import random
|
||||
import uuid
|
||||
import json
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import uuid
|
||||
|
||||
from flask import Flask, Response, jsonify, request
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
# Models from environment variables
|
||||
def get_models():
|
||||
models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct")
|
||||
|
@ -29,40 +31,72 @@ def get_models():
|
|||
return {
|
||||
"object": "list",
|
||||
"data": [
|
||||
{
|
||||
"id": model_id,
|
||||
"object": "model",
|
||||
"created": 1234567890,
|
||||
"owned_by": "vllm"
|
||||
}
|
||||
for model_id in model_ids
|
||||
]
|
||||
{"id": model_id, "object": "model", "created": 1234567890, "owned_by": "vllm"} for model_id in model_ids
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def generate_random_text(length=50):
|
||||
"""Generate random but coherent text for responses."""
|
||||
words = [
|
||||
"Hello", "there", "I'm", "an", "AI", "assistant", "ready", "to", "help", "you",
|
||||
"with", "your", "questions", "and", "tasks", "today", "Let", "me","know", "what",
|
||||
"you'd", "like", "to", "discuss", "or", "explore", "together", "I", "can", "assist",
|
||||
"with", "various", "topics", "including", "coding", "writing", "analysis", "and", "more"
|
||||
"Hello",
|
||||
"there",
|
||||
"I'm",
|
||||
"an",
|
||||
"AI",
|
||||
"assistant",
|
||||
"ready",
|
||||
"to",
|
||||
"help",
|
||||
"you",
|
||||
"with",
|
||||
"your",
|
||||
"questions",
|
||||
"and",
|
||||
"tasks",
|
||||
"today",
|
||||
"Let",
|
||||
"me",
|
||||
"know",
|
||||
"what",
|
||||
"you'd",
|
||||
"like",
|
||||
"to",
|
||||
"discuss",
|
||||
"or",
|
||||
"explore",
|
||||
"together",
|
||||
"I",
|
||||
"can",
|
||||
"assist",
|
||||
"with",
|
||||
"various",
|
||||
"topics",
|
||||
"including",
|
||||
"coding",
|
||||
"writing",
|
||||
"analysis",
|
||||
"and",
|
||||
"more",
|
||||
]
|
||||
return " ".join(random.choices(words, k=length))
|
||||
|
||||
@app.route('/v1/models', methods=['GET'])
|
||||
|
||||
@app.route("/v1/models", methods=["GET"])
|
||||
def list_models():
|
||||
models = get_models()
|
||||
print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}")
|
||||
return jsonify(models)
|
||||
|
||||
@app.route('/v1/chat/completions', methods=['POST'])
|
||||
|
||||
@app.route("/v1/chat/completions", methods=["POST"])
|
||||
def chat_completions():
|
||||
"""Return OpenAI-formatted chat completion responses."""
|
||||
data = request.get_json()
|
||||
default_model = get_models()['data'][0]['id']
|
||||
model = data.get('model', default_model)
|
||||
messages = data.get('messages', [])
|
||||
stream = data.get('stream', False)
|
||||
default_model = get_models()["data"][0]["id"]
|
||||
model = data.get("model", default_model)
|
||||
messages = data.get("messages", [])
|
||||
stream = data.get("stream", False)
|
||||
|
||||
print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}")
|
||||
|
||||
|
@ -71,11 +105,12 @@ def chat_completions():
|
|||
else:
|
||||
return handle_non_streaming_completion(model, messages)
|
||||
|
||||
|
||||
def handle_non_streaming_completion(model, messages):
|
||||
response_text = generate_random_text(random.randint(20, 80))
|
||||
|
||||
# Calculate realistic token counts
|
||||
prompt_tokens = sum(len(str(msg.get('content', '')).split()) for msg in messages)
|
||||
prompt_tokens = sum(len(str(msg.get("content", "")).split()) for msg in messages)
|
||||
completion_tokens = len(response_text.split())
|
||||
|
||||
response = {
|
||||
|
@ -83,25 +118,17 @@ def handle_non_streaming_completion(model, messages):
|
|||
"object": "chat.completion",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": response_text
|
||||
},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
],
|
||||
"choices": [{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}],
|
||||
"usage": {
|
||||
"prompt_tokens": prompt_tokens,
|
||||
"completion_tokens": completion_tokens,
|
||||
"total_tokens": prompt_tokens + completion_tokens
|
||||
}
|
||||
"total_tokens": prompt_tokens + completion_tokens,
|
||||
},
|
||||
}
|
||||
|
||||
return jsonify(response)
|
||||
|
||||
|
||||
def handle_streaming_completion(model, messages):
|
||||
def generate_stream():
|
||||
# Generate response text
|
||||
|
@ -114,12 +141,7 @@ def handle_streaming_completion(model, messages):
|
|||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"role": "assistant", "content": ""}
|
||||
}
|
||||
]
|
||||
"choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}}],
|
||||
}
|
||||
yield f"data: {json.dumps(initial_chunk)}\n\n"
|
||||
|
||||
|
@ -130,12 +152,7 @@ def handle_streaming_completion(model, messages):
|
|||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"content": f"{word} " if i < len(words) - 1 else word}
|
||||
}
|
||||
]
|
||||
"choices": [{"index": 0, "delta": {"content": f"{word} " if i < len(words) - 1 else word}}],
|
||||
}
|
||||
yield f"data: {json.dumps(chunk)}\n\n"
|
||||
# Configurable delay to simulate realistic streaming
|
||||
|
@ -148,35 +165,30 @@ def handle_streaming_completion(model, messages):
|
|||
"object": "chat.completion.chunk",
|
||||
"created": int(time.time()),
|
||||
"model": model,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"delta": {"content": ""},
|
||||
"finish_reason": "stop"
|
||||
}
|
||||
]
|
||||
"choices": [{"index": 0, "delta": {"content": ""}, "finish_reason": "stop"}],
|
||||
}
|
||||
yield f"data: {json.dumps(final_chunk)}\n\n"
|
||||
yield "data: [DONE]\n\n"
|
||||
|
||||
return Response(
|
||||
generate_stream(),
|
||||
mimetype='text/event-stream',
|
||||
mimetype="text/event-stream",
|
||||
headers={
|
||||
'Cache-Control': 'no-cache',
|
||||
'Connection': 'keep-alive',
|
||||
'Access-Control-Allow-Origin': '*',
|
||||
}
|
||||
"Cache-Control": "no-cache",
|
||||
"Connection": "keep-alive",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
},
|
||||
)
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
|
||||
@app.route("/health", methods=["GET"])
|
||||
def health():
|
||||
return jsonify({"status": "healthy", "type": "openai-mock"})
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(description='OpenAI-compatible mock server')
|
||||
parser.add_argument('--port', type=int, default=8081,
|
||||
help='Port to run the server on (default: 8081)')
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="OpenAI-compatible mock server")
|
||||
parser.add_argument("--port", type=int, default=8081, help="Port to run the server on (default: 8081)")
|
||||
args = parser.parse_args()
|
||||
|
||||
port = args.port
|
||||
|
@ -187,4 +199,4 @@ if __name__ == '__main__':
|
|||
print("- OpenAI-formatted chat/completion responses with dynamic content")
|
||||
print("- Streaming support with valid SSE format")
|
||||
print(f"- Listening on: http://0.0.0.0:{port}")
|
||||
app.run(host='0.0.0.0', port=port, debug=False)
|
||||
app.run(host="0.0.0.0", port=port, debug=False)
|
|
@ -2,6 +2,7 @@ version: '2'
|
|||
image_name: kubernetes-benchmark-demo
|
||||
apis:
|
||||
- agents
|
||||
- files
|
||||
- inference
|
||||
- files
|
||||
- safety
|
||||
|
@ -20,6 +21,14 @@ providers:
|
|||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
config: {}
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||
vector_io:
|
||||
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
|
||||
provider_type: remote::chromadb
|
101
docs/_static/css/my_theme.css
vendored
101
docs/_static/css/my_theme.css
vendored
|
@ -1,5 +1,106 @@
|
|||
@import url("theme.css");
|
||||
|
||||
/* Horizontal Navigation Bar */
|
||||
.horizontal-nav {
|
||||
background-color: #ffffff;
|
||||
border-bottom: 1px solid #e5e5e5;
|
||||
padding: 0;
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: 1050;
|
||||
height: 50px;
|
||||
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
[data-theme="dark"] .horizontal-nav {
|
||||
background-color: #1a1a1a;
|
||||
border-bottom: 1px solid #333;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-container {
|
||||
max-width: 1200px;
|
||||
margin: 0 auto;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
padding: 0 20px;
|
||||
height: 100%;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-brand {
|
||||
font-size: 18px;
|
||||
font-weight: 600;
|
||||
color: #333;
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .horizontal-nav .nav-brand {
|
||||
color: #fff;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 30px;
|
||||
list-style: none;
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links a {
|
||||
color: #666;
|
||||
text-decoration: none;
|
||||
font-size: 14px;
|
||||
font-weight: 500;
|
||||
padding: 8px 12px;
|
||||
border-radius: 6px;
|
||||
transition: all 0.2s ease;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links a:hover,
|
||||
.horizontal-nav .nav-links a.active {
|
||||
color: #333;
|
||||
background-color: #f5f5f5;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links a.active {
|
||||
font-weight: 600;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .horizontal-nav .nav-links a {
|
||||
color: #ccc;
|
||||
}
|
||||
|
||||
[data-theme="dark"] .horizontal-nav .nav-links a:hover,
|
||||
[data-theme="dark"] .horizontal-nav .nav-links a.active {
|
||||
color: #fff;
|
||||
background-color: #333;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links .github-link {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
}
|
||||
|
||||
.horizontal-nav .nav-links .github-icon {
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
fill: currentColor;
|
||||
}
|
||||
|
||||
/* Adjust main content to account for fixed nav */
|
||||
.wy-nav-side {
|
||||
top: 50px;
|
||||
height: calc(100vh - 50px);
|
||||
}
|
||||
|
||||
.wy-nav-content-wrap {
|
||||
margin-top: 50px;
|
||||
}
|
||||
|
||||
.wy-nav-content {
|
||||
max-width: 90%;
|
||||
}
|
||||
|
|
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
44
docs/_static/js/horizontal_nav.js
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
// Horizontal Navigation Bar for Llama Stack Documentation
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
// Create the horizontal navigation HTML
|
||||
const navHTML = `
|
||||
<nav class="horizontal-nav">
|
||||
<div class="nav-container">
|
||||
<a href="/" class="nav-brand">Llama Stack</a>
|
||||
<ul class="nav-links">
|
||||
<li><a href="/">Docs</a></li>
|
||||
<li><a href="/references/api_reference/">API Reference</a></li>
|
||||
<li><a href="https://github.com/meta-llama/llama-stack" target="_blank" class="github-link">
|
||||
<svg class="github-icon" viewBox="0 0 16 16" aria-hidden="true">
|
||||
<path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.013 8.013 0 0016 8c0-4.42-3.58-8-8-8z"/>
|
||||
</svg>
|
||||
GitHub
|
||||
</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</nav>
|
||||
`;
|
||||
|
||||
// Insert the navigation at the beginning of the body
|
||||
document.body.insertAdjacentHTML('afterbegin', navHTML);
|
||||
|
||||
// Update navigation links based on current page
|
||||
updateActiveNav();
|
||||
});
|
||||
|
||||
function updateActiveNav() {
|
||||
const currentPath = window.location.pathname;
|
||||
const navLinks = document.querySelectorAll('.horizontal-nav .nav-links a');
|
||||
|
||||
navLinks.forEach(link => {
|
||||
// Remove any existing active classes
|
||||
link.classList.remove('active');
|
||||
|
||||
// Add active class based on current path
|
||||
if (currentPath === '/' && link.getAttribute('href') === '/') {
|
||||
link.classList.add('active');
|
||||
} else if (currentPath.includes('/references/api_reference/') && link.getAttribute('href').includes('api_reference')) {
|
||||
link.classList.add('active');
|
||||
}
|
||||
});
|
||||
}
|
701
docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
Normal file
701
docs/notebooks/langchain/Llama_Stack_LangChain.ipynb
Normal file
|
@ -0,0 +1,701 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "1ztegmwm4sp",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## LlamaStack + LangChain Integration Tutorial\n",
|
||||
"\n",
|
||||
"This notebook demonstrates how to integrate **LlamaStack** with **LangChain** to build a complete RAG (Retrieval-Augmented Generation) system.\n",
|
||||
"\n",
|
||||
"### Overview\n",
|
||||
"\n",
|
||||
"- **LlamaStack**: Provides the infrastructure for running LLMs and Open AI Compatible Vector Stores\n",
|
||||
"- **LangChain**: Provides the framework for chaining operations and prompt templates\n",
|
||||
"- **Integration**: Uses LlamaStack's OpenAI-compatible API with LangChain\n",
|
||||
"\n",
|
||||
"### What You'll See\n",
|
||||
"\n",
|
||||
"1. Setting up LlamaStack server with Fireworks AI provider\n",
|
||||
"2. Creating and Querying Vector Stores\n",
|
||||
"3. Building RAG chains with LangChain + LLAMAStack\n",
|
||||
"4. Querying the chain for relevant information\n",
|
||||
"\n",
|
||||
"### Prerequisites\n",
|
||||
"\n",
|
||||
"- Fireworks API key\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"### 1. Installation and Setup"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "2ktr5ls2cas",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Install Required Dependencies\n",
|
||||
"\n",
|
||||
"First, we install all the necessary packages for LangChain and FastAPI integration."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "5b6a6a17-b931-4bea-8273-0d6e5563637a",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: uv in /Users/swapna942/miniconda3/lib/python3.12/site-packages (0.7.20)\n",
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /Users/swapna942/miniconda3\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m7 packages\u001b[0m \u001b[2min 42ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install uv\n",
|
||||
"!uv pip install fastapi uvicorn \"langchain>=0.2\" langchain-openai \\\n",
|
||||
" langchain-community langchain-text-splitters \\\n",
|
||||
" faiss-cpu"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "wmt9jvqzh7n",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2. LlamaStack Server Setup\n",
|
||||
"\n",
|
||||
"#### Build and Start LlamaStack Server\n",
|
||||
"\n",
|
||||
"This section sets up the LlamaStack server with:\n",
|
||||
"- **Fireworks AI** as the inference provider\n",
|
||||
"- **Sentence Transformers** for embeddings\n",
|
||||
"\n",
|
||||
"The server runs on `localhost:8321` and provides OpenAI-compatible endpoints."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "dd2dacf3-ec8b-4cc7-8ff4-b5b6ea4a6e9e",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import subprocess\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"# Remove UV_SYSTEM_PYTHON to ensure uv creates a proper virtual environment\n",
|
||||
"# instead of trying to use system Python globally, which could cause permission issues\n",
|
||||
"# and package conflicts with the system's Python installation\n",
|
||||
"if \"UV_SYSTEM_PYTHON\" in os.environ:\n",
|
||||
" del os.environ[\"UV_SYSTEM_PYTHON\"]\n",
|
||||
"\n",
|
||||
"def run_llama_stack_server_background():\n",
|
||||
" \"\"\"Build and run LlamaStack server in one step using --run flag\"\"\"\n",
|
||||
" log_file = open(\"llama_stack_server.log\", \"w\")\n",
|
||||
" process = subprocess.Popen(\n",
|
||||
" \"uv run --with llama-stack llama stack build --distro starter --image-type venv --run\",\n",
|
||||
" shell=True,\n",
|
||||
" stdout=log_file,\n",
|
||||
" stderr=log_file,\n",
|
||||
" text=True,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" print(f\"Building and starting Llama Stack server with PID: {process.pid}\")\n",
|
||||
" return process\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def wait_for_server_to_start():\n",
|
||||
" import requests\n",
|
||||
" from requests.exceptions import ConnectionError\n",
|
||||
"\n",
|
||||
" url = \"http://0.0.0.0:8321/v1/health\"\n",
|
||||
" max_retries = 30\n",
|
||||
" retry_interval = 1\n",
|
||||
"\n",
|
||||
" print(\"Waiting for server to start\", end=\"\")\n",
|
||||
" for _ in range(max_retries):\n",
|
||||
" try:\n",
|
||||
" response = requests.get(url)\n",
|
||||
" if response.status_code == 200:\n",
|
||||
" print(\"\\nServer is ready!\")\n",
|
||||
" return True\n",
|
||||
" except ConnectionError:\n",
|
||||
" print(\".\", end=\"\", flush=True)\n",
|
||||
" time.sleep(retry_interval)\n",
|
||||
"\n",
|
||||
" print(\"\\nServer failed to start after\", max_retries * retry_interval, \"seconds\")\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes using pkill command\n",
|
||||
" os.system(\"pkill -f llama_stack.core.server.server\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "28bd8dbd-4576-4e76-813f-21ab94db44a2",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Building and starting Llama Stack server with PID: 19747\n",
|
||||
"Waiting for server to start....\n",
|
||||
"Server is ready!\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"server_process = run_llama_stack_server_background()\n",
|
||||
"assert wait_for_server_to_start()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "gr9cdcg4r7n",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Install LlamaStack Client\n",
|
||||
"\n",
|
||||
"Install the client library to interact with the LlamaStack server."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "487d2dbc-d071-400e-b4f0-dcee58f8dc95",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\u001b[2mUsing Python 3.12.11 environment at: /Users/swapna942/miniconda3\u001b[0m\n",
|
||||
"\u001b[2mAudited \u001b[1m1 package\u001b[0m \u001b[2min 27ms\u001b[0m\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!uv pip install llama_stack_client"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0j5hag7l9x89",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 3. Initialize LlamaStack Client\n",
|
||||
"\n",
|
||||
"Create a client connection to the LlamaStack server with API keys for different providers:\n",
|
||||
"\n",
|
||||
"- **Fireworks API Key**: For Fireworks models\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "ab4eff97-4565-4c73-b1b3-0020a4c7e2a5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackClient(\n",
|
||||
" base_url=\"http://0.0.0.0:8321\",\n",
|
||||
" provider_data={\"fireworks_api_key\": \"***\"},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "vwhexjy1e8o",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Explore Available Models and Safety Features\n",
|
||||
"\n",
|
||||
"Check what models and safety shields are available through your LlamaStack instance."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "880443ef-ac3c-48b1-a80a-7dab5b25ac61",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/models \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: GET http://0.0.0.0:8321/v1/shields \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Available Fireworks models:\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-70b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p1-405b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-3b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-11b-vision-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p2-90b-vision-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-v3p3-70b-instruct\n",
|
||||
"- fireworks/accounts/fireworks/models/llama4-scout-instruct-basic\n",
|
||||
"- fireworks/accounts/fireworks/models/llama4-maverick-instruct-basic\n",
|
||||
"- fireworks/nomic-ai/nomic-embed-text-v1.5\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-guard-3-8b\n",
|
||||
"- fireworks/accounts/fireworks/models/llama-guard-3-11b-vision\n",
|
||||
"----\n",
|
||||
"Available shields (safety models):\n",
|
||||
"code-scanner\n",
|
||||
"llama-guard\n",
|
||||
"nemo-guardrail\n",
|
||||
"----\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\"Available Fireworks models:\")\n",
|
||||
"for m in client.models.list():\n",
|
||||
" if m.identifier.startswith(\"fireworks/\"):\n",
|
||||
" print(f\"- {m.identifier}\")\n",
|
||||
"\n",
|
||||
"print(\"----\")\n",
|
||||
"print(\"Available shields (safety models):\")\n",
|
||||
"for s in client.shields.list():\n",
|
||||
" print(s.identifier)\n",
|
||||
"print(\"----\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "gojp7at31ht",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 4. Vector Store Setup\n",
|
||||
"\n",
|
||||
"#### Create a Vector Store with File Upload\n",
|
||||
"\n",
|
||||
"Create a vector store using the OpenAI-compatible vector stores API:\n",
|
||||
"\n",
|
||||
"- **Vector Store**: OpenAI-compatible vector store for document storage\n",
|
||||
"- **File Upload**: Automatic chunking and embedding of uploaded files \n",
|
||||
"- **Embedding Model**: Sentence Transformers model for text embeddings\n",
|
||||
"- **Dimensions**: 384-dimensional embeddings"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "be2c2899-ea53-4e5f-b6b8-ed425f5d6572",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/files \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"File(id='file-54652c95c56c4c34918a97d7ff8a4320', bytes=41, created_at=1757442621, expires_at=1788978621, filename='shipping_policy.txt', object='file', purpose='assistants')\n",
|
||||
"File(id='file-fb1227c1d1854da1bd774d21e5b7e41c', bytes=48, created_at=1757442621, expires_at=1788978621, filename='returns_policy.txt', object='file', purpose='assistants')\n",
|
||||
"File(id='file-673f874852fe42798675a13d06a256e2', bytes=45, created_at=1757442621, expires_at=1788978621, filename='support.txt', object='file', purpose='assistants')\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from io import BytesIO\n",
|
||||
"\n",
|
||||
"docs = [\n",
|
||||
" (\"Acme ships globally in 3-5 business days.\", {\"title\": \"Shipping Policy\"}),\n",
|
||||
" (\"Returns are accepted within 30 days of purchase.\", {\"title\": \"Returns Policy\"}),\n",
|
||||
" (\"Support is available 24/7 via chat and email.\", {\"title\": \"Support\"}),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"file_ids = []\n",
|
||||
"for content, metadata in docs:\n",
|
||||
" with BytesIO(content.encode()) as file_buffer:\n",
|
||||
" file_buffer.name = f\"{metadata['title'].replace(' ', '_').lower()}.txt\"\n",
|
||||
" create_file_response = client.files.create(file=file_buffer, purpose=\"assistants\")\n",
|
||||
" print(create_file_response)\n",
|
||||
" file_ids.append(create_file_response.id)\n",
|
||||
"\n",
|
||||
"# Create vector store with files\n",
|
||||
"vector_store = client.vector_stores.create(\n",
|
||||
" name=\"acme_docs\",\n",
|
||||
" file_ids=file_ids,\n",
|
||||
" embedding_model=\"sentence-transformers/all-MiniLM-L6-v2\",\n",
|
||||
" embedding_dimension=384,\n",
|
||||
" provider_id=\"faiss\"\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "9061tmi1zpq",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Test Vector Store Search\n",
|
||||
"\n",
|
||||
"Query the vector store. This performs semantic search to find relevant documents based on the query."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "ba9d1901-bd5e-4216-b3e6-19dc74551cc6",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Acme ships globally in 3-5 business days.\n",
|
||||
"Returns are accepted within 30 days of purchase.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"search_response = client.vector_stores.search(\n",
|
||||
" vector_store_id=vector_store.id,\n",
|
||||
" query=\"How long does shipping take?\",\n",
|
||||
" max_num_results=2\n",
|
||||
")\n",
|
||||
"for result in search_response.data:\n",
|
||||
" content = result.content[0].text\n",
|
||||
" print(content)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "usne6mbspms",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5. LangChain Integration\n",
|
||||
"\n",
|
||||
"#### Configure LangChain with LlamaStack\n",
|
||||
"\n",
|
||||
"Set up LangChain to use LlamaStack's OpenAI-compatible API:\n",
|
||||
"\n",
|
||||
"- **Base URL**: Points to LlamaStack's OpenAI endpoint\n",
|
||||
"- **Headers**: Include Fireworks API key for model access\n",
|
||||
"- **Model**: Use Meta Llama v3p1 8b instruct model for inference"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "c378bd10-09c2-417c-bdfc-1e0a2dd19084",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from langchain_openai import ChatOpenAI\n",
|
||||
"\n",
|
||||
"# Point LangChain to Llamastack Server\n",
|
||||
"llm = ChatOpenAI(\n",
|
||||
" base_url=\"http://0.0.0.0:8321/v1/openai/v1\",\n",
|
||||
" api_key=\"dummy\",\n",
|
||||
" model=\"fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
|
||||
" default_headers={\"X-LlamaStack-Provider-Data\": '{\"fireworks_api_key\": \"***\"}'},\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "5a4ddpcuk3l",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Test LLM Connection\n",
|
||||
"\n",
|
||||
"Verify that LangChain can successfully communicate with the LlamaStack server."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "f88ffb5a-657b-4916-9375-c6ddc156c25e",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"A llama's gentle eyes shine bright,\\nIn the Andes, it roams through morning light.\", additional_kwargs={'refusal': None}, response_metadata={'token_usage': None, 'model_name': 'fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct', 'system_fingerprint': None, 'id': 'chatcmpl-602b5967-82a3-476b-9cd2-7d3b29b76ee8', 'service_tier': None, 'finish_reason': 'stop', 'logprobs': None}, id='run--0933c465-ff4d-4a7b-b7fb-fd97dd8244f3-0')"
|
||||
]
|
||||
},
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Test llm with simple message\n",
|
||||
"messages = [\n",
|
||||
" {\"role\": \"system\", \"content\": \"You are a friendly assistant.\"},\n",
|
||||
" {\"role\": \"user\", \"content\": \"Write a two-sentence poem about llama.\"},\n",
|
||||
"]\n",
|
||||
"llm.invoke(messages)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0xh0jg6a0l4a",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 6. Building the RAG Chain\n",
|
||||
"\n",
|
||||
"#### Create a Complete RAG Pipeline\n",
|
||||
"\n",
|
||||
"Build a LangChain pipeline that combines:\n",
|
||||
"\n",
|
||||
"1. **Vector Search**: Query LlamaStack's Open AI compatible Vector Store\n",
|
||||
"2. **Context Assembly**: Format retrieved documents\n",
|
||||
"3. **Prompt Template**: Structure the input for the LLM\n",
|
||||
"4. **LLM Generation**: Generate answers using context\n",
|
||||
"5. **Output Parsing**: Extract the final response\n",
|
||||
"\n",
|
||||
"**Chain Flow**: `Query → Vector Search → Context + Question → LLM → Response`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "9684427d-dcc7-4544-9af5-8b110d014c42",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# LangChain for prompt template and chaining + LLAMA Stack Client Vector DB and LLM chat completion\n",
|
||||
"from langchain_core.output_parsers import StrOutputParser\n",
|
||||
"from langchain_core.prompts import ChatPromptTemplate\n",
|
||||
"from langchain_core.runnables import RunnableLambda, RunnablePassthrough\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def join_docs(docs):\n",
|
||||
" return \"\\n\\n\".join([f\"[{d.filename}] {d.content[0].text}\" for d in docs.data])\n",
|
||||
"\n",
|
||||
"PROMPT = ChatPromptTemplate.from_messages(\n",
|
||||
" [\n",
|
||||
" (\"system\", \"You are a helpful assistant. Use the following context to answer.\"),\n",
|
||||
" (\"user\", \"Question: {question}\\n\\nContext:\\n{context}\"),\n",
|
||||
" ]\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"vector_step = RunnableLambda(\n",
|
||||
" lambda x: client.vector_stores.search(\n",
|
||||
" vector_store_id=vector_store.id,\n",
|
||||
" query=x,\n",
|
||||
" max_num_results=2\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"chain = (\n",
|
||||
" {\"context\": vector_step | RunnableLambda(join_docs), \"question\": RunnablePassthrough()}\n",
|
||||
" | PROMPT\n",
|
||||
" | llm\n",
|
||||
" | StrOutputParser()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "0onu6rhphlra",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 7. Testing the RAG System\n",
|
||||
"\n",
|
||||
"#### Example 1: Shipping Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"id": "03322188-9509-446a-a4a8-ce3bb83ec87c",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"❓ How long does shipping take?\n",
|
||||
"💡 Acme ships globally in 3-5 business days. This means that shipping typically takes between 3 to 5 working days from the date of dispatch or order fulfillment.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"How long does shipping take?\"\n",
|
||||
"response = chain.invoke(query)\n",
|
||||
"print(\"❓\", query)\n",
|
||||
"print(\"💡\", response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "b7krhqj88ku",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"#### Example 2: Returns Policy Query"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"id": "61995550-bb0b-46a8-a5d0-023207475d60",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/vector_stores/vs_708c060b-45da-423e-8354-68529b4fd1a6/search \"HTTP/1.1 200 OK\"\n",
|
||||
"INFO:httpx:HTTP Request: POST http://0.0.0.0:8321/v1/openai/v1/chat/completions \"HTTP/1.1 200 OK\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"❓ Can I return a product after 40 days?\n",
|
||||
"💡 Based on the provided context, you cannot return a product after 40 days. The return window is limited to 30 days from the date of purchase.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"query = \"Can I return a product after 40 days?\"\n",
|
||||
"response = chain.invoke(query)\n",
|
||||
"print(\"❓\", query)\n",
|
||||
"print(\"💡\", response)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "h4w24fadvjs",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"We have successfully built a RAG system that combines:\n",
|
||||
"\n",
|
||||
"- **LlamaStack** for infrastructure (LLM serving + Vector Store)\n",
|
||||
"- **LangChain** for orchestration (prompts + chains)\n",
|
||||
"- **Fireworks** for high-quality language models\n",
|
||||
"\n",
|
||||
"### Key Benefits\n",
|
||||
"\n",
|
||||
"1. **Unified Infrastructure**: Single server for LLMs and Vector Store\n",
|
||||
"2. **OpenAI Compatibility**: Easy integration with existing LangChain code\n",
|
||||
"3. **Multi-Provider Support**: Switch between different LLM providers\n",
|
||||
"4. **Production Ready**: Built-in safety shields and monitoring\n",
|
||||
"\n",
|
||||
"### Next Steps\n",
|
||||
"\n",
|
||||
"- Add more sophisticated document processing\n",
|
||||
"- Implement conversation memory\n",
|
||||
"- Add safety filtering and monitoring\n",
|
||||
"- Scale to larger document collections\n",
|
||||
"- Integrate with web frameworks like FastAPI or Streamlit\n",
|
||||
"\n",
|
||||
"---\n",
|
||||
"\n",
|
||||
"##### 🔧 Cleanup\n",
|
||||
"\n",
|
||||
"Don't forget to stop the LlamaStack server when you're done:\n",
|
||||
"\n",
|
||||
"```python\n",
|
||||
"kill_llama_stack_server()\n",
|
||||
"```"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"id": "15647c46-22ce-4698-af3f-8161329d8e3a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"kill_llama_stack_server()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
|
@ -93,10 +93,31 @@ chunks_response = client.vector_io.query(
|
|||
|
||||
### Using the RAG Tool
|
||||
|
||||
> **⚠️ DEPRECATION NOTICE**: The RAG Tool is being deprecated in favor of directly using the OpenAI-compatible Search
|
||||
> API. We recommend migrating to the OpenAI APIs for better compatibility and future support.
|
||||
|
||||
A better way to ingest documents is to use the RAG Tool. This tool allows you to ingest documents from URLs, files, etc.
|
||||
and automatically chunks them into smaller pieces. More examples for how to format a RAGDocument can be found in the
|
||||
[appendix](#more-ragdocument-examples).
|
||||
|
||||
#### OpenAI API Integration & Migration
|
||||
|
||||
The RAG tool has been updated to use OpenAI-compatible APIs. This provides several benefits:
|
||||
|
||||
- **Files API Integration**: Documents are now uploaded using OpenAI's file upload endpoints
|
||||
- **Vector Stores API**: Vector storage operations use OpenAI's vector store format with configurable chunking strategies
|
||||
- **Error Resilience:** When processing multiple documents, individual failures are logged but don't crash the operation. Failed documents are skipped while successful ones continue processing.
|
||||
|
||||
**Migration Path:**
|
||||
We recommend migrating to the OpenAI-compatible Search API for:
|
||||
1. **Better OpenAI Ecosystem Integration**: Direct compatibility with OpenAI tools and workflows including the Responses API
|
||||
2**Future-Proof**: Continued support and feature development
|
||||
3**Full OpenAI Compatibility**: Vector Stores, Files, and Search APIs are fully compatible with OpenAI's Responses API
|
||||
|
||||
The OpenAI APIs are used under the hood, so you can continue to use your existing RAG Tool code with minimal changes.
|
||||
However, we recommend updating your code to use the new OpenAI-compatible APIs for better long-term support. If any
|
||||
documents fail to process, they will be logged in the response but will not cause the entire operation to fail.
|
||||
|
||||
```python
|
||||
from llama_stack_client import RAGDocument
|
||||
|
||||
|
|
|
@ -131,6 +131,7 @@ html_static_path = ["../_static"]
|
|||
def setup(app):
|
||||
app.add_css_file("css/my_theme.css")
|
||||
app.add_js_file("js/detect_theme.js")
|
||||
app.add_js_file("js/horizontal_nav.js")
|
||||
app.add_js_file("js/keyboard_shortcuts.js")
|
||||
|
||||
def dockerhub_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
|
||||
|
|
|
@ -35,5 +35,5 @@ testing/record-replay
|
|||
|
||||
### Benchmarking
|
||||
|
||||
```{include} ../../../docs/source/distributions/k8s-benchmark/README.md
|
||||
```{include} ../../../benchmarking/k8s-benchmark/README.md
|
||||
```
|
||||
|
|
|
@ -18,6 +18,7 @@ This section contains documentation for all available providers for the **infere
|
|||
inline_meta-reference
|
||||
inline_sentence-transformers
|
||||
remote_anthropic
|
||||
remote_azure
|
||||
remote_bedrock
|
||||
remote_cerebras
|
||||
remote_databricks
|
||||
|
|
29
docs/source/providers/inference/remote_azure.md
Normal file
29
docs/source/providers/inference/remote_azure.md
Normal file
|
@ -0,0 +1,29 @@
|
|||
# remote::azure
|
||||
|
||||
## Description
|
||||
|
||||
|
||||
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
||||
Provider documentation
|
||||
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||
|
||||
|
||||
## Configuration
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `api_key` | `<class 'pydantic.types.SecretStr'>` | No | | Azure API key for Azure |
|
||||
| `api_base` | `<class 'pydantic.networks.HttpUrl'>` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
|
||||
```
|
||||
|
|
@ -48,15 +48,12 @@ def setup_verify_download_parser(parser: argparse.ArgumentParser) -> None:
|
|||
parser.set_defaults(func=partial(run_verify_cmd, parser=parser))
|
||||
|
||||
|
||||
def calculate_md5(filepath: Path, chunk_size: int = 8192) -> str:
|
||||
# NOTE: MD5 is used here only for download integrity verification,
|
||||
# not for security purposes
|
||||
# TODO: switch to SHA256
|
||||
md5_hash = hashlib.md5(usedforsecurity=False)
|
||||
def calculate_sha256(filepath: Path, chunk_size: int = 8192) -> str:
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(filepath, "rb") as f:
|
||||
for chunk in iter(lambda: f.read(chunk_size), b""):
|
||||
md5_hash.update(chunk)
|
||||
return md5_hash.hexdigest()
|
||||
sha256_hash.update(chunk)
|
||||
return sha256_hash.hexdigest()
|
||||
|
||||
|
||||
def load_checksums(checklist_path: Path) -> dict[str, str]:
|
||||
|
@ -64,10 +61,10 @@ def load_checksums(checklist_path: Path) -> dict[str, str]:
|
|||
with open(checklist_path) as f:
|
||||
for line in f:
|
||||
if line.strip():
|
||||
md5sum, filepath = line.strip().split(" ", 1)
|
||||
sha256sum, filepath = line.strip().split(" ", 1)
|
||||
# Remove leading './' if present
|
||||
filepath = filepath.lstrip("./")
|
||||
checksums[filepath] = md5sum
|
||||
checksums[filepath] = sha256sum
|
||||
return checksums
|
||||
|
||||
|
||||
|
@ -88,7 +85,7 @@ def verify_files(model_dir: Path, checksums: dict[str, str], console: Console) -
|
|||
matches = False
|
||||
|
||||
if exists:
|
||||
actual_hash = calculate_md5(full_path)
|
||||
actual_hash = calculate_sha256(full_path)
|
||||
matches = actual_hash == expected_hash
|
||||
|
||||
results.append(
|
||||
|
|
|
@ -431,6 +431,12 @@ class ServerConfig(BaseModel):
|
|||
)
|
||||
|
||||
|
||||
class InferenceStoreConfig(BaseModel):
|
||||
sql_store_config: SqlStoreConfig
|
||||
max_write_queue_size: int = Field(default=10000, description="Max queued writes for inference store")
|
||||
num_writers: int = Field(default=4, description="Number of concurrent background writers")
|
||||
|
||||
|
||||
class StackRunConfig(BaseModel):
|
||||
version: int = LLAMA_STACK_RUN_CONFIG_VERSION
|
||||
|
||||
|
@ -464,11 +470,12 @@ Configuration for the persistence store used by the distribution registry. If no
|
|||
a default SQLite store will be used.""",
|
||||
)
|
||||
|
||||
inference_store: SqlStoreConfig | None = Field(
|
||||
inference_store: InferenceStoreConfig | SqlStoreConfig | None = Field(
|
||||
default=None,
|
||||
description="""
|
||||
Configuration for the persistence store used by the inference API. If not specified,
|
||||
a default SQLite store will be used.""",
|
||||
Configuration for the persistence store used by the inference API. Can be either a
|
||||
InferenceStoreConfig (with queue tuning parameters) or a SqlStoreConfig (deprecated).
|
||||
If not specified, a default SQLite store will be used.""",
|
||||
)
|
||||
|
||||
# registry of "resources" in the distribution
|
||||
|
|
|
@ -78,7 +78,10 @@ async def get_auto_router_impl(
|
|||
|
||||
# TODO: move pass configs to routers instead
|
||||
if api == Api.inference and run_config.inference_store:
|
||||
inference_store = InferenceStore(run_config.inference_store, policy)
|
||||
inference_store = InferenceStore(
|
||||
config=run_config.inference_store,
|
||||
policy=policy,
|
||||
)
|
||||
await inference_store.initialize()
|
||||
api_to_dep_impl["store"] = inference_store
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ from llama_stack.models.llama.llama3.chat_format import ChatFormat
|
|||
from llama_stack.models.llama.llama3.tokenizer import Tokenizer
|
||||
from llama_stack.providers.datatypes import HealthResponse, HealthStatus, RoutingTable
|
||||
from llama_stack.providers.utils.inference.inference_store import InferenceStore
|
||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
||||
from llama_stack.providers.utils.telemetry.tracing import enqueue_event, get_current_span
|
||||
|
||||
logger = get_logger(name=__name__, category="core::routers")
|
||||
|
||||
|
@ -90,6 +90,11 @@ class InferenceRouter(Inference):
|
|||
|
||||
async def shutdown(self) -> None:
|
||||
logger.debug("InferenceRouter.shutdown")
|
||||
if self.store:
|
||||
try:
|
||||
await self.store.shutdown()
|
||||
except Exception as e:
|
||||
logger.warning(f"Error during InferenceStore shutdown: {e}")
|
||||
|
||||
async def register_model(
|
||||
self,
|
||||
|
@ -160,7 +165,7 @@ class InferenceRouter(Inference):
|
|||
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
||||
if self.telemetry:
|
||||
for metric in metrics:
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
||||
|
||||
async def _count_tokens(
|
||||
|
@ -431,7 +436,7 @@ class InferenceRouter(Inference):
|
|||
model=model_obj,
|
||||
)
|
||||
for metric in metrics:
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
|
||||
# these metrics will show up in the client response.
|
||||
response.metrics = (
|
||||
|
@ -537,7 +542,7 @@ class InferenceRouter(Inference):
|
|||
model=model_obj,
|
||||
)
|
||||
for metric in metrics:
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
# these metrics will show up in the client response.
|
||||
response.metrics = (
|
||||
metrics if not hasattr(response, "metrics") or response.metrics is None else response.metrics + metrics
|
||||
|
@ -664,7 +669,7 @@ class InferenceRouter(Inference):
|
|||
"completion_tokens",
|
||||
"total_tokens",
|
||||
]: # Only log completion and total tokens
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
|
||||
# Return metrics in response
|
||||
async_metrics = [
|
||||
|
@ -710,7 +715,7 @@ class InferenceRouter(Inference):
|
|||
)
|
||||
for metric in completion_metrics:
|
||||
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
|
||||
# Return metrics in response
|
||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
|
||||
|
@ -806,7 +811,7 @@ class InferenceRouter(Inference):
|
|||
model=model,
|
||||
)
|
||||
for metric in metrics:
|
||||
await self.telemetry.log_event(metric)
|
||||
enqueue_event(metric)
|
||||
|
||||
yield chunk
|
||||
finally:
|
||||
|
|
|
@ -17,6 +17,7 @@ distribution_spec:
|
|||
- provider_type: remote::vertexai
|
||||
- provider_type: remote::groq
|
||||
- provider_type: remote::sambanova
|
||||
- provider_type: remote::azure
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_type: inline::faiss
|
||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
|||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
|
|
|
@ -18,6 +18,7 @@ distribution_spec:
|
|||
- provider_type: remote::vertexai
|
||||
- provider_type: remote::groq
|
||||
- provider_type: remote::sambanova
|
||||
- provider_type: remote::azure
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_type: inline::faiss
|
||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
|||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
|
|
|
@ -18,6 +18,7 @@ distribution_spec:
|
|||
- provider_type: remote::vertexai
|
||||
- provider_type: remote::groq
|
||||
- provider_type: remote::sambanova
|
||||
- provider_type: remote::azure
|
||||
- provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
- provider_type: inline::faiss
|
||||
|
|
|
@ -81,6 +81,13 @@ providers:
|
|||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
provider_type: inline::sentence-transformers
|
||||
vector_io:
|
||||
|
|
|
@ -59,6 +59,7 @@ ENABLED_INFERENCE_PROVIDERS = [
|
|||
"cerebras",
|
||||
"nvidia",
|
||||
"bedrock",
|
||||
"azure",
|
||||
]
|
||||
|
||||
INFERENCE_PROVIDER_IDS = {
|
||||
|
@ -68,6 +69,7 @@ INFERENCE_PROVIDER_IDS = {
|
|||
"cerebras": "${env.CEREBRAS_API_KEY:+cerebras}",
|
||||
"nvidia": "${env.NVIDIA_API_KEY:+nvidia}",
|
||||
"vertexai": "${env.VERTEX_AI_PROJECT:+vertexai}",
|
||||
"azure": "${env.AZURE_API_KEY:+azure}",
|
||||
}
|
||||
|
||||
|
||||
|
@ -277,5 +279,21 @@ def get_distribution_template(name: str = "starter") -> DistributionTemplate:
|
|||
"http://localhost:11434",
|
||||
"Ollama URL",
|
||||
),
|
||||
"AZURE_API_KEY": (
|
||||
"",
|
||||
"Azure API Key",
|
||||
),
|
||||
"AZURE_API_BASE": (
|
||||
"",
|
||||
"Azure API Base",
|
||||
),
|
||||
"AZURE_API_VERSION": (
|
||||
"",
|
||||
"Azure API Version",
|
||||
),
|
||||
"AZURE_API_TYPE": (
|
||||
"azure",
|
||||
"Azure API Type",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
from jinja2 import Template
|
||||
|
||||
from llama_stack.apis.common.content_types import InterleavedContent
|
||||
from llama_stack.apis.inference import UserMessage
|
||||
from llama_stack.apis.inference import OpenAIUserMessageParam
|
||||
from llama_stack.apis.tools.rag_tool import (
|
||||
DefaultRAGQueryGeneratorConfig,
|
||||
LLMRAGQueryGeneratorConfig,
|
||||
|
@ -61,16 +61,16 @@ async def llm_rag_query_generator(
|
|||
messages = [interleaved_content_as_str(content)]
|
||||
|
||||
template = Template(config.template)
|
||||
content = template.render({"messages": messages})
|
||||
rendered_content: str = template.render({"messages": messages})
|
||||
|
||||
model = config.model
|
||||
message = UserMessage(content=content)
|
||||
response = await inference_api.chat_completion(
|
||||
model_id=model,
|
||||
message = OpenAIUserMessageParam(content=rendered_content)
|
||||
response = await inference_api.openai_chat_completion(
|
||||
model=model,
|
||||
messages=[message],
|
||||
stream=False,
|
||||
)
|
||||
|
||||
query = response.completion_message.content
|
||||
query = response.choices[0].message.content
|
||||
|
||||
return query
|
||||
|
|
|
@ -45,10 +45,7 @@ from llama_stack.apis.vector_io import (
|
|||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.datatypes import ToolGroupsProtocolPrivate
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import interleaved_content_as_str
|
||||
from llama_stack.providers.utils.memory.vector_store import (
|
||||
content_from_doc,
|
||||
parse_data_url,
|
||||
)
|
||||
from llama_stack.providers.utils.memory.vector_store import parse_data_url
|
||||
|
||||
from .config import RagToolRuntimeConfig
|
||||
from .context_retriever import generate_rag_query
|
||||
|
@ -60,6 +57,47 @@ def make_random_string(length: int = 8):
|
|||
return "".join(secrets.choice(string.ascii_letters + string.digits) for _ in range(length))
|
||||
|
||||
|
||||
async def raw_data_from_doc(doc: RAGDocument) -> tuple[bytes, str]:
|
||||
"""Get raw binary data and mime type from a RAGDocument for file upload."""
|
||||
if isinstance(doc.content, URL):
|
||||
if doc.content.uri.startswith("data:"):
|
||||
parts = parse_data_url(doc.content.uri)
|
||||
mime_type = parts["mimetype"]
|
||||
data = parts["data"]
|
||||
|
||||
if parts["is_base64"]:
|
||||
file_data = base64.b64decode(data)
|
||||
else:
|
||||
file_data = data.encode("utf-8")
|
||||
|
||||
return file_data, mime_type
|
||||
else:
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(doc.content.uri)
|
||||
r.raise_for_status()
|
||||
mime_type = r.headers.get("content-type", "application/octet-stream")
|
||||
return r.content, mime_type
|
||||
else:
|
||||
if isinstance(doc.content, str):
|
||||
content_str = doc.content
|
||||
else:
|
||||
content_str = interleaved_content_as_str(doc.content)
|
||||
|
||||
if content_str.startswith("data:"):
|
||||
parts = parse_data_url(content_str)
|
||||
mime_type = parts["mimetype"]
|
||||
data = parts["data"]
|
||||
|
||||
if parts["is_base64"]:
|
||||
file_data = base64.b64decode(data)
|
||||
else:
|
||||
file_data = data.encode("utf-8")
|
||||
|
||||
return file_data, mime_type
|
||||
else:
|
||||
return content_str.encode("utf-8"), "text/plain"
|
||||
|
||||
|
||||
class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRuntime):
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -95,46 +133,52 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
return
|
||||
|
||||
for doc in documents:
|
||||
if isinstance(doc.content, URL):
|
||||
if doc.content.uri.startswith("data:"):
|
||||
parts = parse_data_url(doc.content.uri)
|
||||
file_data = base64.b64decode(parts["data"]) if parts["is_base64"] else parts["data"].encode()
|
||||
mime_type = parts["mimetype"]
|
||||
else:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(doc.content.uri)
|
||||
file_data = response.content
|
||||
mime_type = doc.mime_type or response.headers.get("content-type", "application/octet-stream")
|
||||
else:
|
||||
content_str = await content_from_doc(doc)
|
||||
file_data = content_str.encode("utf-8")
|
||||
mime_type = doc.mime_type or "text/plain"
|
||||
try:
|
||||
try:
|
||||
file_data, mime_type = await raw_data_from_doc(doc)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to extract content from document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||
file_extension = mimetypes.guess_extension(mime_type) or ".txt"
|
||||
filename = doc.metadata.get("filename", f"{doc.document_id}{file_extension}")
|
||||
|
||||
file_obj = io.BytesIO(file_data)
|
||||
file_obj.name = filename
|
||||
file_obj = io.BytesIO(file_data)
|
||||
file_obj.name = filename
|
||||
|
||||
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||
upload_file = UploadFile(file=file_obj, filename=filename)
|
||||
|
||||
created_file = await self.files_api.openai_upload_file(
|
||||
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||
)
|
||||
try:
|
||||
created_file = await self.files_api.openai_upload_file(
|
||||
file=upload_file, purpose=OpenAIFilePurpose.ASSISTANTS
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(f"Failed to upload file for document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||
static=VectorStoreChunkingStrategyStaticConfig(
|
||||
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||
chunking_strategy = VectorStoreChunkingStrategyStatic(
|
||||
static=VectorStoreChunkingStrategyStaticConfig(
|
||||
max_chunk_size_tokens=chunk_size_in_tokens,
|
||||
chunk_overlap_tokens=chunk_size_in_tokens // 4,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_db_id,
|
||||
file_id=created_file.id,
|
||||
attributes=doc.metadata,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
try:
|
||||
await self.vector_io_api.openai_attach_file_to_vector_store(
|
||||
vector_store_id=vector_db_id,
|
||||
file_id=created_file.id,
|
||||
attributes=doc.metadata,
|
||||
chunking_strategy=chunking_strategy,
|
||||
)
|
||||
except Exception as e:
|
||||
log.error(
|
||||
f"Failed to attach file {created_file.id} to vector store {vector_db_id} for document {doc.document_id}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
except Exception as e:
|
||||
log.error(f"Unexpected error processing document {doc.document_id}: {e}")
|
||||
continue
|
||||
|
||||
async def query(
|
||||
self,
|
||||
|
@ -274,7 +318,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
if query_config:
|
||||
query_config = TypeAdapter(RAGQueryConfig).validate_python(query_config)
|
||||
else:
|
||||
# handle someone passing an empty dict
|
||||
query_config = RAGQueryConfig()
|
||||
|
||||
query = kwargs["query"]
|
||||
|
@ -285,6 +328,6 @@ class MemoryToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, RAGToolRunti
|
|||
)
|
||||
|
||||
return ToolInvocationResult(
|
||||
content=result.content,
|
||||
content=result.content or [],
|
||||
metadata=result.metadata,
|
||||
)
|
||||
|
|
|
@ -13,7 +13,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
InlineProviderSpec(
|
||||
api=Api.batches,
|
||||
provider_type="inline::reference",
|
||||
pip_packages=["openai"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.inline.batches.reference",
|
||||
config_class="llama_stack.providers.inline.batches.reference.config.ReferenceBatchesImplConfig",
|
||||
api_dependencies=[
|
||||
|
|
|
@ -75,7 +75,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="vllm",
|
||||
pip_packages=["openai"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.vllm",
|
||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
||||
|
@ -151,9 +151,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="databricks",
|
||||
pip_packages=[
|
||||
"openai",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.databricks",
|
||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
||||
|
@ -163,9 +161,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="nvidia",
|
||||
pip_packages=[
|
||||
"openai",
|
||||
],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.nvidia",
|
||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
||||
|
@ -175,7 +171,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="runpod",
|
||||
pip_packages=["openai"],
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.remote.inference.runpod",
|
||||
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
||||
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
||||
|
@ -207,7 +203,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="gemini",
|
||||
pip_packages=["litellm", "openai"],
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.gemini",
|
||||
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
||||
|
@ -218,7 +214,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="vertexai",
|
||||
pip_packages=["litellm", "google-cloud-aiplatform", "openai"],
|
||||
pip_packages=["litellm", "google-cloud-aiplatform"],
|
||||
module="llama_stack.providers.remote.inference.vertexai",
|
||||
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
|
||||
|
@ -248,7 +244,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="groq",
|
||||
pip_packages=["litellm", "openai"],
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.groq",
|
||||
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
||||
|
@ -270,7 +266,7 @@ Available Models:
|
|||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="sambanova",
|
||||
pip_packages=["litellm", "openai"],
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.sambanova",
|
||||
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
||||
|
@ -299,4 +295,19 @@ Available Models:
|
|||
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="azure",
|
||||
pip_packages=["litellm"],
|
||||
module="llama_stack.providers.remote.inference.azure",
|
||||
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
||||
description="""
|
||||
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
||||
Provider documentation
|
||||
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
||||
""",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
@ -38,7 +38,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
InlineProviderSpec(
|
||||
api=Api.scoring,
|
||||
provider_type="inline::braintrust",
|
||||
pip_packages=["autoevals", "openai"],
|
||||
pip_packages=["autoevals"],
|
||||
module="llama_stack.providers.inline.scoring.braintrust",
|
||||
config_class="llama_stack.providers.inline.scoring.braintrust.BraintrustScoringConfig",
|
||||
api_dependencies=[
|
||||
|
|
15
llama_stack/providers/remote/inference/azure/__init__.py
Normal file
15
llama_stack/providers/remote/inference/azure/__init__.py
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .config import AzureConfig
|
||||
|
||||
|
||||
async def get_adapter_impl(config: AzureConfig, _deps):
|
||||
from .azure import AzureInferenceAdapter
|
||||
|
||||
impl = AzureInferenceAdapter(config)
|
||||
await impl.initialize()
|
||||
return impl
|
64
llama_stack/providers/remote/inference/azure/azure.py
Normal file
64
llama_stack/providers/remote/inference/azure/azure.py
Normal file
|
@ -0,0 +1,64 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Any
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.apis.inference import ChatCompletionRequest
|
||||
from llama_stack.providers.utils.inference.litellm_openai_mixin import (
|
||||
LiteLLMOpenAIMixin,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import AzureConfig
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
|
||||
class AzureInferenceAdapter(OpenAIMixin, LiteLLMOpenAIMixin):
|
||||
def __init__(self, config: AzureConfig) -> None:
|
||||
LiteLLMOpenAIMixin.__init__(
|
||||
self,
|
||||
MODEL_ENTRIES,
|
||||
litellm_provider_name="azure",
|
||||
api_key_from_config=config.api_key.get_secret_value(),
|
||||
provider_data_api_key_field="azure_api_key",
|
||||
openai_compat_api_base=str(config.api_base),
|
||||
)
|
||||
self.config = config
|
||||
|
||||
# Delegate the client data handling get_api_key method to LiteLLMOpenAIMixin
|
||||
get_api_key = LiteLLMOpenAIMixin.get_api_key
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
Get the Azure API base URL.
|
||||
|
||||
Returns the Azure API base URL from the configuration.
|
||||
"""
|
||||
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||
|
||||
async def _get_params(self, request: ChatCompletionRequest) -> dict[str, Any]:
|
||||
# Get base parameters from parent
|
||||
params = await super()._get_params(request)
|
||||
|
||||
# Add Azure specific parameters
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data:
|
||||
if getattr(provider_data, "azure_api_key", None):
|
||||
params["api_key"] = provider_data.azure_api_key
|
||||
if getattr(provider_data, "azure_api_base", None):
|
||||
params["api_base"] = provider_data.azure_api_base
|
||||
if getattr(provider_data, "azure_api_version", None):
|
||||
params["api_version"] = provider_data.azure_api_version
|
||||
if getattr(provider_data, "azure_api_type", None):
|
||||
params["api_type"] = provider_data.azure_api_type
|
||||
else:
|
||||
params["api_key"] = self.config.api_key.get_secret_value()
|
||||
params["api_base"] = str(self.config.api_base)
|
||||
params["api_version"] = self.config.api_version
|
||||
params["api_type"] = self.config.api_type
|
||||
|
||||
return params
|
63
llama_stack/providers/remote/inference/azure/config.py
Normal file
63
llama_stack/providers/remote/inference/azure/config.py
Normal file
|
@ -0,0 +1,63 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.schema_utils import json_schema_type
|
||||
|
||||
|
||||
class AzureProviderDataValidator(BaseModel):
|
||||
azure_api_key: SecretStr = Field(
|
||||
description="Azure API key for Azure",
|
||||
)
|
||||
azure_api_base: HttpUrl = Field(
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||
)
|
||||
azure_api_version: str | None = Field(
|
||||
default=None,
|
||||
description="Azure API version for Azure (e.g., 2024-06-01)",
|
||||
)
|
||||
azure_api_type: str | None = Field(
|
||||
default="azure",
|
||||
description="Azure API type for Azure (e.g., azure)",
|
||||
)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class AzureConfig(BaseModel):
|
||||
api_key: SecretStr = Field(
|
||||
description="Azure API key for Azure",
|
||||
)
|
||||
api_base: HttpUrl = Field(
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||
)
|
||||
api_version: str | None = Field(
|
||||
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
||||
description="Azure API version for Azure (e.g., 2024-12-01-preview)",
|
||||
)
|
||||
api_type: str | None = Field(
|
||||
default_factory=lambda: os.getenv("AZURE_API_TYPE", "azure"),
|
||||
description="Azure API type for Azure (e.g., azure)",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
api_key: str = "${env.AZURE_API_KEY:=}",
|
||||
api_base: str = "${env.AZURE_API_BASE:=}",
|
||||
api_version: str = "${env.AZURE_API_VERSION:=}",
|
||||
api_type: str = "${env.AZURE_API_TYPE:=}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
"api_base": api_base,
|
||||
"api_version": api_version,
|
||||
"api_type": api_type,
|
||||
}
|
28
llama_stack/providers/remote/inference/azure/models.py
Normal file
28
llama_stack/providers/remote/inference/azure/models.py
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ProviderModelEntry,
|
||||
)
|
||||
|
||||
# https://learn.microsoft.com/en-us/azure/ai-foundry/openai/concepts/models?tabs=global-standard%2Cstandard-chat-completions
|
||||
LLM_MODEL_IDS = [
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-5-nano",
|
||||
"gpt-5-chat",
|
||||
"o1",
|
||||
"o1-mini",
|
||||
"o3-mini",
|
||||
"o4-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
]
|
||||
|
||||
SAFETY_MODELS_ENTRIES = list[ProviderModelEntry]()
|
||||
|
||||
MODEL_ENTRIES = [ProviderModelEntry(provider_model_id=m) for m in LLM_MODEL_IDS] + SAFETY_MODELS_ENTRIES
|
|
@ -53,6 +53,43 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
|
||||
from .models import MODEL_ENTRIES
|
||||
|
||||
REGION_PREFIX_MAP = {
|
||||
"us": "us.",
|
||||
"eu": "eu.",
|
||||
"ap": "ap.",
|
||||
}
|
||||
|
||||
|
||||
def _get_region_prefix(region: str | None) -> str:
|
||||
# AWS requires region prefixes for inference profiles
|
||||
if region is None:
|
||||
return "us." # default to US when we don't know
|
||||
|
||||
# Handle case insensitive region matching
|
||||
region_lower = region.lower()
|
||||
for prefix in REGION_PREFIX_MAP:
|
||||
if region_lower.startswith(f"{prefix}-"):
|
||||
return REGION_PREFIX_MAP[prefix]
|
||||
|
||||
# Fallback to US for anything we don't recognize
|
||||
return "us."
|
||||
|
||||
|
||||
def _to_inference_profile_id(model_id: str, region: str = None) -> str:
|
||||
# Return ARNs unchanged
|
||||
if model_id.startswith("arn:"):
|
||||
return model_id
|
||||
|
||||
# Return inference profile IDs that already have regional prefixes
|
||||
if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
|
||||
return model_id
|
||||
|
||||
# Default to US East when no region is provided
|
||||
if region is None:
|
||||
region = "us-east-1"
|
||||
|
||||
return _get_region_prefix(region) + model_id
|
||||
|
||||
|
||||
class BedrockInferenceAdapter(
|
||||
ModelRegistryHelper,
|
||||
|
@ -166,8 +203,13 @@ class BedrockInferenceAdapter(
|
|||
options["repetition_penalty"] = sampling_params.repetition_penalty
|
||||
|
||||
prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
|
||||
|
||||
# Convert foundation model ID to inference profile ID
|
||||
region_name = self.client.meta.region_name
|
||||
inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
|
||||
|
||||
return {
|
||||
"modelId": bedrock_model,
|
||||
"modelId": inference_profile_id,
|
||||
"body": json.dumps(
|
||||
{
|
||||
"prompt": prompt,
|
||||
|
@ -185,6 +227,11 @@ class BedrockInferenceAdapter(
|
|||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
# Convert foundation model ID to inference profile ID
|
||||
region_name = self.client.meta.region_name
|
||||
inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
|
||||
|
||||
embeddings = []
|
||||
for content in contents:
|
||||
assert not content_has_media(content), "Bedrock does not support media for embeddings"
|
||||
|
@ -193,7 +240,7 @@ class BedrockInferenceAdapter(
|
|||
body = json.dumps(input_body)
|
||||
response = self.client.invoke_model(
|
||||
body=body,
|
||||
modelId=model.provider_resource_id,
|
||||
modelId=inference_profile_id,
|
||||
accept="application/json",
|
||||
contentType="application/json",
|
||||
)
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import json
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from collections.abc import AsyncGenerator
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
@ -38,13 +38,6 @@ from llama_stack.apis.inference import (
|
|||
LogProbConfig,
|
||||
Message,
|
||||
ModelStore,
|
||||
OpenAIChatCompletion,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseFormatParam,
|
||||
ResponseFormat,
|
||||
SamplingParams,
|
||||
TextTruncation,
|
||||
|
@ -71,11 +64,11 @@ from llama_stack.providers.utils.inference.openai_compat import (
|
|||
convert_message_to_openai_dict,
|
||||
convert_tool_call,
|
||||
get_sampling_options,
|
||||
prepare_openai_completion_params,
|
||||
process_chat_completion_stream_response,
|
||||
process_completion_response,
|
||||
process_completion_stream_response,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
|
@ -288,7 +281,7 @@ async def _process_vllm_chat_completion_stream_response(
|
|||
yield c
|
||||
|
||||
|
||||
class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
||||
class VLLMInferenceAdapter(OpenAIMixin, Inference, ModelsProtocolPrivate):
|
||||
# automatically set by the resolver when instantiating the provider
|
||||
__provider_id__: str
|
||||
model_store: ModelStore | None = None
|
||||
|
@ -296,7 +289,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
def __init__(self, config: VLLMInferenceAdapterConfig) -> None:
|
||||
self.register_helper = ModelRegistryHelper(build_hf_repo_model_entries())
|
||||
self.config = config
|
||||
self.client = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
if not self.config.url:
|
||||
|
@ -308,8 +300,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
return self.config.refresh_models
|
||||
|
||||
async def list_models(self) -> list[Model] | None:
|
||||
self._lazy_initialize_client()
|
||||
assert self.client is not None # mypy
|
||||
models = []
|
||||
async for m in self.client.models.list():
|
||||
model_type = ModelType.llm # unclear how to determine embedding vs. llm models
|
||||
|
@ -340,8 +330,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
HealthResponse: A dictionary containing the health status.
|
||||
"""
|
||||
try:
|
||||
client = self._create_client() if self.client is None else self.client
|
||||
_ = [m async for m in client.models.list()] # Ensure the client is initialized
|
||||
_ = [m async for m in self.client.models.list()] # Ensure the client is initialized
|
||||
return HealthResponse(status=HealthStatus.OK)
|
||||
except Exception as e:
|
||||
return HealthResponse(status=HealthStatus.ERROR, message=f"Health check failed: {str(e)}")
|
||||
|
@ -351,19 +340,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
raise ValueError("Model store not set")
|
||||
return await self.model_store.get_model(model_id)
|
||||
|
||||
def _lazy_initialize_client(self):
|
||||
if self.client is not None:
|
||||
return
|
||||
def get_api_key(self):
|
||||
return self.config.api_token
|
||||
|
||||
log.info(f"Initializing vLLM client with base_url={self.config.url}")
|
||||
self.client = self._create_client()
|
||||
def get_base_url(self):
|
||||
return self.config.url
|
||||
|
||||
def _create_client(self):
|
||||
return AsyncOpenAI(
|
||||
base_url=self.config.url,
|
||||
api_key=self.config.api_token,
|
||||
http_client=httpx.AsyncClient(verify=self.config.tls_verify),
|
||||
)
|
||||
def get_extra_client_params(self):
|
||||
return {"http_client": httpx.AsyncClient(verify=self.config.tls_verify)}
|
||||
|
||||
async def completion(
|
||||
self,
|
||||
|
@ -374,7 +358,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
stream: bool | None = False,
|
||||
logprobs: LogProbConfig | None = None,
|
||||
) -> CompletionResponse | AsyncGenerator[CompletionResponseStreamChunk, None]:
|
||||
self._lazy_initialize_client()
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
model = await self._get_model(model_id)
|
||||
|
@ -406,7 +389,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
logprobs: LogProbConfig | None = None,
|
||||
tool_config: ToolConfig | None = None,
|
||||
) -> ChatCompletionResponse | AsyncGenerator[ChatCompletionResponseStreamChunk, None]:
|
||||
self._lazy_initialize_client()
|
||||
if sampling_params is None:
|
||||
sampling_params = SamplingParams()
|
||||
model = await self._get_model(model_id)
|
||||
|
@ -479,16 +461,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
yield chunk
|
||||
|
||||
async def register_model(self, model: Model) -> Model:
|
||||
# register_model is called during Llama Stack initialization, hence we cannot init self.client if not initialized yet.
|
||||
# self.client should only be created after the initialization is complete to avoid asyncio cross-context errors.
|
||||
# Changing this may lead to unpredictable behavior.
|
||||
client = self._create_client() if self.client is None else self.client
|
||||
try:
|
||||
model = await self.register_helper.register_model(model)
|
||||
except ValueError:
|
||||
pass # Ignore statically unknown model, will check live listing
|
||||
try:
|
||||
res = await client.models.list()
|
||||
res = await self.client.models.list()
|
||||
except APIConnectionError as e:
|
||||
raise ValueError(
|
||||
f"Failed to connect to vLLM at {self.config.url}. Please check if vLLM is running and accessible at that URL."
|
||||
|
@ -543,8 +521,6 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
output_dimension: int | None = None,
|
||||
task_type: EmbeddingTaskType | None = None,
|
||||
) -> EmbeddingsResponse:
|
||||
self._lazy_initialize_client()
|
||||
assert self.client is not None
|
||||
model = await self._get_model(model_id)
|
||||
|
||||
kwargs = {}
|
||||
|
@ -560,154 +536,3 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
|
||||
embeddings = [data.embedding for data in response.data]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
model: str,
|
||||
input: str | list[str],
|
||||
encoding_format: str | None = "float",
|
||||
dimensions: int | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIEmbeddingsResponse:
|
||||
self._lazy_initialize_client()
|
||||
assert self.client is not None
|
||||
model_obj = await self._get_model(model)
|
||||
assert model_obj.model_type == ModelType.embedding
|
||||
|
||||
# Convert input to list if it's a string
|
||||
input_list = [input] if isinstance(input, str) else input
|
||||
|
||||
# Call vLLM embeddings endpoint with encoding_format
|
||||
response = await self.client.embeddings.create(
|
||||
model=model_obj.provider_resource_id,
|
||||
input=input_list,
|
||||
dimensions=dimensions,
|
||||
encoding_format=encoding_format,
|
||||
)
|
||||
|
||||
# Convert response to OpenAI format
|
||||
data = [
|
||||
OpenAIEmbeddingData(
|
||||
embedding=embedding_data.embedding,
|
||||
index=i,
|
||||
)
|
||||
for i, embedding_data in enumerate(response.data)
|
||||
]
|
||||
|
||||
# Not returning actual token usage since vLLM doesn't provide it
|
||||
usage = OpenAIEmbeddingUsage(prompt_tokens=-1, total_tokens=-1)
|
||||
|
||||
return OpenAIEmbeddingsResponse(
|
||||
data=data,
|
||||
model=model_obj.provider_resource_id,
|
||||
usage=usage,
|
||||
)
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str | list[str] | list[int] | list[list[int]],
|
||||
best_of: int | None = None,
|
||||
echo: bool | None = None,
|
||||
frequency_penalty: float | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
guided_choice: list[str] | None = None,
|
||||
prompt_logprobs: int | None = None,
|
||||
suffix: str | None = None,
|
||||
) -> OpenAICompletion:
|
||||
self._lazy_initialize_client()
|
||||
model_obj = await self._get_model(model)
|
||||
|
||||
extra_body: dict[str, Any] = {}
|
||||
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
||||
extra_body["prompt_logprobs"] = prompt_logprobs
|
||||
if guided_choice:
|
||||
extra_body["guided_choice"] = guided_choice
|
||||
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
prompt=prompt,
|
||||
best_of=best_of,
|
||||
echo=echo,
|
||||
frequency_penalty=frequency_penalty,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
presence_penalty=presence_penalty,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
extra_body=extra_body,
|
||||
)
|
||||
return await self.client.completions.create(**params) # type: ignore
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
model: str,
|
||||
messages: list[OpenAIMessageParam],
|
||||
frequency_penalty: float | None = None,
|
||||
function_call: str | dict[str, Any] | None = None,
|
||||
functions: list[dict[str, Any]] | None = None,
|
||||
logit_bias: dict[str, float] | None = None,
|
||||
logprobs: bool | None = None,
|
||||
max_completion_tokens: int | None = None,
|
||||
max_tokens: int | None = None,
|
||||
n: int | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
presence_penalty: float | None = None,
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
seed: int | None = None,
|
||||
stop: str | list[str] | None = None,
|
||||
stream: bool | None = None,
|
||||
stream_options: dict[str, Any] | None = None,
|
||||
temperature: float | None = None,
|
||||
tool_choice: str | dict[str, Any] | None = None,
|
||||
tools: list[dict[str, Any]] | None = None,
|
||||
top_logprobs: int | None = None,
|
||||
top_p: float | None = None,
|
||||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
self._lazy_initialize_client()
|
||||
model_obj = await self._get_model(model)
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
presence_penalty=presence_penalty,
|
||||
response_format=response_format,
|
||||
seed=seed,
|
||||
stop=stop,
|
||||
stream=stream,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
)
|
||||
return await self.client.chat.completions.create(**params) # type: ignore
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
import asyncio
|
||||
from typing import Any
|
||||
|
||||
from sqlalchemy.exc import IntegrityError
|
||||
|
||||
from llama_stack.apis.inference import (
|
||||
ListOpenAIChatCompletionResponse,
|
||||
OpenAIChatCompletion,
|
||||
|
@ -10,24 +15,43 @@ from llama_stack.apis.inference import (
|
|||
OpenAIMessageParam,
|
||||
Order,
|
||||
)
|
||||
from llama_stack.core.datatypes import AccessRule
|
||||
from llama_stack.core.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
from llama_stack.core.datatypes import AccessRule, InferenceStoreConfig
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
from ..sqlstore.api import ColumnDefinition, ColumnType
|
||||
from ..sqlstore.authorized_sqlstore import AuthorizedSqlStore
|
||||
from ..sqlstore.sqlstore import SqliteSqlStoreConfig, SqlStoreConfig, sqlstore_impl
|
||||
from ..sqlstore.sqlstore import SqlStoreConfig, SqlStoreType, sqlstore_impl
|
||||
|
||||
logger = get_logger(name=__name__, category="inference_store")
|
||||
|
||||
|
||||
class InferenceStore:
|
||||
def __init__(self, sql_store_config: SqlStoreConfig, policy: list[AccessRule]):
|
||||
if not sql_store_config:
|
||||
sql_store_config = SqliteSqlStoreConfig(
|
||||
db_path=(RUNTIME_BASE_DIR / "sqlstore.db").as_posix(),
|
||||
def __init__(
|
||||
self,
|
||||
config: InferenceStoreConfig | SqlStoreConfig,
|
||||
policy: list[AccessRule],
|
||||
):
|
||||
# Handle backward compatibility
|
||||
if not isinstance(config, InferenceStoreConfig):
|
||||
# Legacy: SqlStoreConfig passed directly as config
|
||||
config = InferenceStoreConfig(
|
||||
sql_store_config=config,
|
||||
)
|
||||
self.sql_store_config = sql_store_config
|
||||
|
||||
self.config = config
|
||||
self.sql_store_config = config.sql_store_config
|
||||
self.sql_store = None
|
||||
self.policy = policy
|
||||
|
||||
# Disable write queue for SQLite to avoid concurrency issues
|
||||
self.enable_write_queue = self.sql_store_config.type != SqlStoreType.sqlite
|
||||
|
||||
# Async write queue and worker control
|
||||
self._queue: asyncio.Queue[tuple[OpenAIChatCompletion, list[OpenAIMessageParam]]] | None = None
|
||||
self._worker_tasks: list[asyncio.Task[Any]] = []
|
||||
self._max_write_queue_size: int = config.max_write_queue_size
|
||||
self._num_writers: int = max(1, config.num_writers)
|
||||
|
||||
async def initialize(self):
|
||||
"""Create the necessary tables if they don't exist."""
|
||||
self.sql_store = AuthorizedSqlStore(sqlstore_impl(self.sql_store_config))
|
||||
|
@ -42,23 +66,109 @@ class InferenceStore:
|
|||
},
|
||||
)
|
||||
|
||||
if self.enable_write_queue:
|
||||
self._queue = asyncio.Queue(maxsize=self._max_write_queue_size)
|
||||
for _ in range(self._num_writers):
|
||||
self._worker_tasks.append(asyncio.create_task(self._worker_loop()))
|
||||
else:
|
||||
logger.info("Write queue disabled for SQLite to avoid concurrency issues")
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if not self._worker_tasks:
|
||||
return
|
||||
if self._queue is not None:
|
||||
await self._queue.join()
|
||||
for t in self._worker_tasks:
|
||||
if not t.done():
|
||||
t.cancel()
|
||||
for t in self._worker_tasks:
|
||||
try:
|
||||
await t
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
self._worker_tasks.clear()
|
||||
|
||||
async def flush(self) -> None:
|
||||
"""Wait for all queued writes to complete. Useful for testing."""
|
||||
if self.enable_write_queue and self._queue is not None:
|
||||
await self._queue.join()
|
||||
|
||||
async def store_chat_completion(
|
||||
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||
) -> None:
|
||||
if not self.sql_store:
|
||||
if self.enable_write_queue:
|
||||
if self._queue is None:
|
||||
raise ValueError("Inference store is not initialized")
|
||||
try:
|
||||
self._queue.put_nowait((chat_completion, input_messages))
|
||||
except asyncio.QueueFull:
|
||||
logger.warning(
|
||||
f"Write queue full; adding chat completion id={getattr(chat_completion, 'id', '<unknown>')}"
|
||||
)
|
||||
await self._queue.put((chat_completion, input_messages))
|
||||
else:
|
||||
await self._write_chat_completion(chat_completion, input_messages)
|
||||
|
||||
async def _worker_loop(self) -> None:
|
||||
assert self._queue is not None
|
||||
while True:
|
||||
try:
|
||||
item = await self._queue.get()
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
chat_completion, input_messages = item
|
||||
try:
|
||||
await self._write_chat_completion(chat_completion, input_messages)
|
||||
except Exception as e: # noqa: BLE001
|
||||
logger.error(f"Error writing chat completion: {e}")
|
||||
finally:
|
||||
self._queue.task_done()
|
||||
|
||||
async def _write_chat_completion(
|
||||
self, chat_completion: OpenAIChatCompletion, input_messages: list[OpenAIMessageParam]
|
||||
) -> None:
|
||||
if self.sql_store is None:
|
||||
raise ValueError("Inference store is not initialized")
|
||||
|
||||
data = chat_completion.model_dump()
|
||||
record_data = {
|
||||
"id": data["id"],
|
||||
"created": data["created"],
|
||||
"model": data["model"],
|
||||
"choices": data["choices"],
|
||||
"input_messages": [message.model_dump() for message in input_messages],
|
||||
}
|
||||
|
||||
await self.sql_store.insert(
|
||||
table="chat_completions",
|
||||
data={
|
||||
"id": data["id"],
|
||||
"created": data["created"],
|
||||
"model": data["model"],
|
||||
"choices": data["choices"],
|
||||
"input_messages": [message.model_dump() for message in input_messages],
|
||||
},
|
||||
try:
|
||||
await self.sql_store.insert(
|
||||
table="chat_completions",
|
||||
data=record_data,
|
||||
)
|
||||
except IntegrityError as e:
|
||||
# Duplicate chat completion IDs can be generated during tests especially if they are replaying
|
||||
# recorded responses across different tests. No need to warn or error under those circumstances.
|
||||
# In the wild, this is not likely to happen at all (no evidence) so we aren't really hiding any problem.
|
||||
|
||||
# Check if it's a unique constraint violation
|
||||
error_message = str(e.orig) if e.orig else str(e)
|
||||
if self._is_unique_constraint_error(error_message):
|
||||
# Update the existing record instead
|
||||
await self.sql_store.update(table="chat_completions", data=record_data, where={"id": data["id"]})
|
||||
else:
|
||||
# Re-raise if it's not a unique constraint error
|
||||
raise
|
||||
|
||||
def _is_unique_constraint_error(self, error_message: str) -> bool:
|
||||
"""Check if the error is specifically a unique constraint violation."""
|
||||
error_lower = error_message.lower()
|
||||
return any(
|
||||
indicator in error_lower
|
||||
for indicator in [
|
||||
"unique constraint failed", # SQLite
|
||||
"duplicate key", # PostgreSQL
|
||||
"unique violation", # PostgreSQL alternative
|
||||
"duplicate entry", # MySQL
|
||||
]
|
||||
)
|
||||
|
||||
async def list_chat_completions(
|
||||
|
|
|
@ -67,6 +67,17 @@ class OpenAIMixin(ABC):
|
|||
"""
|
||||
pass
|
||||
|
||||
def get_extra_client_params(self) -> dict[str, Any]:
|
||||
"""
|
||||
Get any extra parameters to pass to the AsyncOpenAI client.
|
||||
|
||||
Child classes can override this method to provide additional parameters
|
||||
such as timeout settings, proxies, etc.
|
||||
|
||||
:return: A dictionary of extra parameters
|
||||
"""
|
||||
return {}
|
||||
|
||||
@property
|
||||
def client(self) -> AsyncOpenAI:
|
||||
"""
|
||||
|
@ -78,6 +89,7 @@ class OpenAIMixin(ABC):
|
|||
return AsyncOpenAI(
|
||||
api_key=self.get_api_key(),
|
||||
base_url=self.get_base_url(),
|
||||
**self.get_extra_client_params(),
|
||||
)
|
||||
|
||||
async def _get_provider_model_id(self, model: str) -> str:
|
||||
|
@ -124,10 +136,15 @@ class OpenAIMixin(ABC):
|
|||
"""
|
||||
Direct OpenAI completion API call.
|
||||
"""
|
||||
if guided_choice is not None:
|
||||
logger.warning("guided_choice is not supported by the OpenAI API. Ignoring.")
|
||||
if prompt_logprobs is not None:
|
||||
logger.warning("prompt_logprobs is not supported by the OpenAI API. Ignoring.")
|
||||
# Handle parameters that are not supported by OpenAI API, but may be by the provider
|
||||
# prompt_logprobs is supported by vLLM
|
||||
# guided_choice is supported by vLLM
|
||||
# TODO: test coverage
|
||||
extra_body: dict[str, Any] = {}
|
||||
if prompt_logprobs is not None and prompt_logprobs >= 0:
|
||||
extra_body["prompt_logprobs"] = prompt_logprobs
|
||||
if guided_choice:
|
||||
extra_body["guided_choice"] = guided_choice
|
||||
|
||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||
return await self.client.completions.create( # type: ignore[no-any-return]
|
||||
|
@ -150,7 +167,8 @@ class OpenAIMixin(ABC):
|
|||
top_p=top_p,
|
||||
user=user,
|
||||
suffix=suffix,
|
||||
)
|
||||
),
|
||||
extra_body=extra_body,
|
||||
)
|
||||
|
||||
async def openai_chat_completion(
|
||||
|
|
|
@ -172,6 +172,20 @@ class AuthorizedSqlStore:
|
|||
|
||||
return results.data[0] if results.data else None
|
||||
|
||||
async def update(self, table: str, data: Mapping[str, Any], where: Mapping[str, Any]) -> None:
|
||||
"""Update rows with automatic access control attribute capture."""
|
||||
enhanced_data = dict(data)
|
||||
|
||||
current_user = get_authenticated_user()
|
||||
if current_user:
|
||||
enhanced_data["owner_principal"] = current_user.principal
|
||||
enhanced_data["access_attributes"] = current_user.attributes
|
||||
else:
|
||||
enhanced_data["owner_principal"] = None
|
||||
enhanced_data["access_attributes"] = None
|
||||
|
||||
await self.sql_store.update(table, enhanced_data, where)
|
||||
|
||||
async def delete(self, table: str, where: Mapping[str, Any]) -> None:
|
||||
"""Delete rows with automatic access control filtering."""
|
||||
await self.sql_store.delete(table, where)
|
||||
|
|
|
@ -18,6 +18,7 @@ from functools import wraps
|
|||
from typing import Any
|
||||
|
||||
from llama_stack.apis.telemetry import (
|
||||
Event,
|
||||
LogSeverity,
|
||||
Span,
|
||||
SpanEndPayload,
|
||||
|
@ -98,7 +99,7 @@ class BackgroundLogger:
|
|||
def __init__(self, api: Telemetry, capacity: int = 100000):
|
||||
self.api = api
|
||||
self.log_queue: queue.Queue[Any] = queue.Queue(maxsize=capacity)
|
||||
self.worker_thread = threading.Thread(target=self._process_logs, daemon=True)
|
||||
self.worker_thread = threading.Thread(target=self._worker, daemon=True)
|
||||
self.worker_thread.start()
|
||||
self._last_queue_full_log_time: float = 0.0
|
||||
self._dropped_since_last_notice: int = 0
|
||||
|
@ -118,12 +119,16 @@ class BackgroundLogger:
|
|||
self._last_queue_full_log_time = current_time
|
||||
self._dropped_since_last_notice = 0
|
||||
|
||||
def _process_logs(self):
|
||||
def _worker(self):
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
loop.run_until_complete(self._process_logs())
|
||||
|
||||
async def _process_logs(self):
|
||||
while True:
|
||||
try:
|
||||
event = self.log_queue.get()
|
||||
# figure out how to use a thread's native loop
|
||||
asyncio.run(self.api.log_event(event))
|
||||
await self.api.log_event(event)
|
||||
except Exception:
|
||||
import traceback
|
||||
|
||||
|
@ -136,6 +141,19 @@ class BackgroundLogger:
|
|||
self.log_queue.join()
|
||||
|
||||
|
||||
def enqueue_event(event: Event) -> None:
|
||||
"""Enqueue a telemetry event to the background logger if available.
|
||||
|
||||
This provides a non-blocking path for routers and other hot paths to
|
||||
submit telemetry without awaiting the Telemetry API, reducing contention
|
||||
with the main event loop.
|
||||
"""
|
||||
global BACKGROUND_LOGGER
|
||||
if BACKGROUND_LOGGER is None:
|
||||
raise RuntimeError("Telemetry API not initialized")
|
||||
BACKGROUND_LOGGER.log_event(event)
|
||||
|
||||
|
||||
class TraceContext:
|
||||
spans: list[Span] = []
|
||||
|
||||
|
@ -256,11 +274,7 @@ class TelemetryHandler(logging.Handler):
|
|||
if record.module in ("asyncio", "selector_events"):
|
||||
return
|
||||
|
||||
global CURRENT_TRACE_CONTEXT, BACKGROUND_LOGGER
|
||||
|
||||
if BACKGROUND_LOGGER is None:
|
||||
raise RuntimeError("Telemetry API not initialized")
|
||||
|
||||
global CURRENT_TRACE_CONTEXT
|
||||
context = CURRENT_TRACE_CONTEXT.get()
|
||||
if context is None:
|
||||
return
|
||||
|
@ -269,7 +283,7 @@ class TelemetryHandler(logging.Handler):
|
|||
if span is None:
|
||||
return
|
||||
|
||||
BACKGROUND_LOGGER.log_event(
|
||||
enqueue_event(
|
||||
UnstructuredLogEvent(
|
||||
trace_id=span.trace_id,
|
||||
span_id=span.span_id,
|
||||
|
|
|
@ -12,14 +12,12 @@ import uuid
|
|||
def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
|
||||
"""
|
||||
Generate a unique chunk ID using a hash of the document ID and chunk text.
|
||||
|
||||
Note: MD5 is used only to calculate an identifier, not for security purposes.
|
||||
Adding usedforsecurity=False for compatibility with FIPS environments.
|
||||
Then use the first 32 characters of the hash to create a UUID.
|
||||
"""
|
||||
hash_input = f"{document_id}:{chunk_text}".encode()
|
||||
if chunk_window:
|
||||
hash_input += f":{chunk_window}".encode()
|
||||
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
|
||||
return str(uuid.UUID(hashlib.sha256(hash_input).hexdigest()[:32]))
|
||||
|
||||
|
||||
def proper_case(s: str) -> str:
|
||||
|
|
|
@ -15,6 +15,8 @@ from enum import StrEnum
|
|||
from pathlib import Path
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from openai import NOT_GIVEN
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
logger = get_logger(__name__, category="testing")
|
||||
|
@ -105,8 +107,12 @@ def _deserialize_response(data: dict[str, Any]) -> Any:
|
|||
|
||||
return cls.model_validate(data["__data__"])
|
||||
except (ImportError, AttributeError, TypeError, ValueError) as e:
|
||||
logger.warning(f"Failed to deserialize object of type {data['__type__']}: {e}")
|
||||
return data["__data__"]
|
||||
logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_validate: {e}")
|
||||
try:
|
||||
return cls.model_construct(**data["__data__"])
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to deserialize object of type {data['__type__']} with model_construct: {e}")
|
||||
return data["__data__"]
|
||||
|
||||
return data
|
||||
|
||||
|
@ -194,20 +200,15 @@ def _model_identifiers_digest(endpoint: str, response: dict[str, Any]) -> str:
|
|||
|
||||
Supported endpoints:
|
||||
- '/api/tags' (Ollama): response body has 'models': [ { name/model/digest/id/... }, ... ]
|
||||
- '/v1/models' (OpenAI): response body has 'data': [ { id: ... }, ... ]
|
||||
- '/v1/models' (OpenAI): response body is: [ { id: ... }, ... ]
|
||||
Returns a list of unique identifiers or None if structure doesn't match.
|
||||
"""
|
||||
body = response["body"]
|
||||
if endpoint == "/api/tags":
|
||||
items = body.get("models")
|
||||
idents = [m.model for m in items]
|
||||
else:
|
||||
items = body.get("data")
|
||||
idents = [m.id for m in items]
|
||||
items = response["body"]
|
||||
idents = [m.model if endpoint == "/api/tags" else m.id for m in items]
|
||||
return sorted(set(idents))
|
||||
|
||||
identifiers = _extract_model_identifiers()
|
||||
return hashlib.sha1(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||
return hashlib.sha256(("|".join(identifiers)).encode("utf-8")).hexdigest()[:8]
|
||||
|
||||
|
||||
def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]]) -> dict[str, Any] | None:
|
||||
|
@ -215,28 +216,22 @@ def _combine_model_list_responses(endpoint: str, records: list[dict[str, Any]])
|
|||
seen: dict[str, dict[str, Any]] = {}
|
||||
for rec in records:
|
||||
body = rec["response"]["body"]
|
||||
if endpoint == "/api/tags":
|
||||
items = body.models
|
||||
elif endpoint == "/v1/models":
|
||||
items = body.data
|
||||
else:
|
||||
items = []
|
||||
|
||||
for m in items:
|
||||
if endpoint == "/v1/models":
|
||||
if endpoint == "/v1/models":
|
||||
for m in body:
|
||||
key = m.id
|
||||
else:
|
||||
seen[key] = m
|
||||
elif endpoint == "/api/tags":
|
||||
for m in body.models:
|
||||
key = m.model
|
||||
seen[key] = m
|
||||
seen[key] = m
|
||||
|
||||
ordered = [seen[k] for k in sorted(seen.keys())]
|
||||
canonical = records[0]
|
||||
canonical_req = canonical.get("request", {})
|
||||
if isinstance(canonical_req, dict):
|
||||
canonical_req["endpoint"] = endpoint
|
||||
if endpoint == "/v1/models":
|
||||
body = {"data": ordered, "object": "list"}
|
||||
else:
|
||||
body = ordered
|
||||
if endpoint == "/api/tags":
|
||||
from ollama import ListResponse
|
||||
|
||||
body = ListResponse(models=ordered)
|
||||
|
@ -247,12 +242,17 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
global _current_mode, _current_storage
|
||||
|
||||
if _current_mode == InferenceMode.LIVE or _current_storage is None:
|
||||
# Normal operation
|
||||
return await original_method(self, *args, **kwargs)
|
||||
if endpoint == "/v1/models":
|
||||
return original_method(self, *args, **kwargs)
|
||||
else:
|
||||
return await original_method(self, *args, **kwargs)
|
||||
|
||||
# Get base URL based on client type
|
||||
if client_type == "openai":
|
||||
base_url = str(self._client.base_url)
|
||||
|
||||
# the OpenAI client methods may pass NOT_GIVEN for unset parameters; filter these out
|
||||
kwargs = {k: v for k, v in kwargs.items() if v is not NOT_GIVEN}
|
||||
elif client_type == "ollama":
|
||||
# Get base URL from the client (Ollama client uses host attribute)
|
||||
base_url = getattr(self, "host", "http://localhost:11434")
|
||||
|
@ -296,7 +296,14 @@ async def _patched_inference_method(original_method, self, client_type, endpoint
|
|||
)
|
||||
|
||||
elif _current_mode == InferenceMode.RECORD:
|
||||
response = await original_method(self, *args, **kwargs)
|
||||
if endpoint == "/v1/models":
|
||||
response = original_method(self, *args, **kwargs)
|
||||
else:
|
||||
response = await original_method(self, *args, **kwargs)
|
||||
|
||||
# we want to store the result of the iterator, not the iterator itself
|
||||
if endpoint == "/v1/models":
|
||||
response = [m async for m in response]
|
||||
|
||||
request_data = {
|
||||
"method": method,
|
||||
|
@ -376,10 +383,14 @@ def patch_inference_clients():
|
|||
_original_methods["embeddings_create"], self, "openai", "/v1/embeddings", *args, **kwargs
|
||||
)
|
||||
|
||||
async def patched_models_list(self, *args, **kwargs):
|
||||
return await _patched_inference_method(
|
||||
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
||||
)
|
||||
def patched_models_list(self, *args, **kwargs):
|
||||
async def _iter():
|
||||
for item in await _patched_inference_method(
|
||||
_original_methods["models_list"], self, "openai", "/v1/models", *args, **kwargs
|
||||
):
|
||||
yield item
|
||||
|
||||
return _iter()
|
||||
|
||||
# Apply OpenAI patches
|
||||
AsyncChatCompletions.create = patched_chat_completions_create
|
||||
|
|
467
llama_stack/ui/package-lock.json
generated
467
llama_stack/ui/package-lock.json
generated
|
@ -11,7 +11,7 @@
|
|||
"@radix-ui/react-collapsible": "^1.1.12",
|
||||
"@radix-ui/react-dialog": "^1.1.13",
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||
"@radix-ui/react-select": "^2.2.5",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-separator": "^1.1.7",
|
||||
"@radix-ui/react-slot": "^1.2.3",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
|
@ -20,7 +20,7 @@
|
|||
"framer-motion": "^12.23.12",
|
||||
"llama-stack-client": "^0.2.21",
|
||||
"lucide-react": "^0.542.0",
|
||||
"next": "15.3.3",
|
||||
"next": "15.5.3",
|
||||
"next-auth": "^4.24.11",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.0.0",
|
||||
|
@ -664,9 +664,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@emnapi/runtime": {
|
||||
"version": "1.4.3",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.4.3.tgz",
|
||||
"integrity": "sha512-pBPWdu6MLKROBX05wSNKcNb++m5Er+KQ9QkB+WVM+pW2Kx9hoSrVTnu3BdkI5eBLZoKu/J6mW/B6i6bJB2ytXQ==",
|
||||
"version": "1.5.0",
|
||||
"resolved": "https://registry.npmjs.org/@emnapi/runtime/-/runtime-1.5.0.tgz",
|
||||
"integrity": "sha512-97/BJ3iXHww3djw6hYIfErCZFee7qCtrneuLa20UXFCOTCfBM2cvQHjWJ2EG0s0MtdNwInarqCTz35i4wWXHsQ==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
|
@ -927,9 +927,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-darwin-arm64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.1.tgz",
|
||||
"integrity": "sha512-pn44xgBtgpEbZsu+lWf2KNb6OAf70X68k+yk69Ic2Xz11zHR/w24/U49XT7AeRwJ0Px+mhALhU5LPci1Aymk7A==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.34.3.tgz",
|
||||
"integrity": "sha512-ryFMfvxxpQRsgZJqBd4wsttYQbCxsJksrv9Lw/v798JcQ8+w84mBWuXwl+TT0WJ/WrYOLaYpwQXi3sA9nTIaIg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -945,13 +945,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-arm64": "1.1.0"
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-darwin-x64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.1.tgz",
|
||||
"integrity": "sha512-VfuYgG2r8BpYiOUN+BfYeFo69nP/MIwAtSJ7/Zpxc5QF3KS22z8Pvg3FkrSFJBPNQ7mmcUcYQFBmEQp7eu1F8Q==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.34.3.tgz",
|
||||
"integrity": "sha512-yHpJYynROAj12TA6qil58hmPmAwxKKC7reUqtGLzsOHfP7/rniNGTL8tjWX6L3CTV4+5P4ypcS7Pp+7OB+8ihA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -967,13 +967,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-darwin-x64": "1.1.0"
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-darwin-arm64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.1.0.tgz",
|
||||
"integrity": "sha512-HZ/JUmPwrJSoM4DIQPv/BfNh9yrOA8tlBbqbLz4JZ5uew2+o22Ik+tHQJcih7QJuSa0zo5coHTfD5J8inqj9DA==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.2.0.tgz",
|
||||
"integrity": "sha512-sBZmpwmxqwlqG9ueWFXtockhsxefaV6O84BMOrhtg/YqbTaRdqDE7hxraVE3y6gVM4eExmfzW4a8el9ArLeEiQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -987,9 +987,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-darwin-x64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.1.0.tgz",
|
||||
"integrity": "sha512-Xzc2ToEmHN+hfvsl9wja0RlnXEgpKNmftriQp6XzY/RaSfwD9th+MSh0WQKzUreLKKINb3afirxW7A0fz2YWuQ==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.2.0.tgz",
|
||||
"integrity": "sha512-M64XVuL94OgiNHa5/m2YvEQI5q2cl9d/wk0qFTDVXcYzi43lxuiFTftMR1tOnFQovVXNZJ5TURSDK2pNe9Yzqg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1003,9 +1003,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-arm": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.1.0.tgz",
|
||||
"integrity": "sha512-s8BAd0lwUIvYCJyRdFqvsj+BJIpDBSxs6ivrOPm/R7piTs5UIwY5OjXrP2bqXC9/moGsyRa37eYWYCOGVXxVrA==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.2.0.tgz",
|
||||
"integrity": "sha512-mWd2uWvDtL/nvIzThLq3fr2nnGfyr/XMXlq8ZJ9WMR6PXijHlC3ksp0IpuhK6bougvQrchUAfzRLnbsen0Cqvw==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
|
@ -1019,9 +1019,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-arm64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.1.0.tgz",
|
||||
"integrity": "sha512-IVfGJa7gjChDET1dK9SekxFFdflarnUB8PwW8aGwEoF3oAsSDuNUTYS+SKDOyOJxQyDC1aPFMuRYLoDInyV9Ew==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.2.0.tgz",
|
||||
"integrity": "sha512-RXwd0CgG+uPRX5YYrkzKyalt2OJYRiJQ8ED/fi1tq9WQW2jsQIn0tqrlR5l5dr/rjqq6AHAxURhj2DVjyQWSOA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -1035,9 +1035,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-ppc64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.1.0.tgz",
|
||||
"integrity": "sha512-tiXxFZFbhnkWE2LA8oQj7KYR+bWBkiV2nilRldT7bqoEZ4HiDOcePr9wVDAZPi/Id5fT1oY9iGnDq20cwUz8lQ==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-ppc64/-/sharp-libvips-linux-ppc64-1.2.0.tgz",
|
||||
"integrity": "sha512-Xod/7KaDDHkYu2phxxfeEPXfVXFKx70EAFZ0qyUdOjCcxbjqyJOEUpDe6RIyaunGxT34Anf9ue/wuWOqBW2WcQ==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
|
@ -1051,9 +1051,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-s390x": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.1.0.tgz",
|
||||
"integrity": "sha512-xukSwvhguw7COyzvmjydRb3x/09+21HykyapcZchiCUkTThEQEOMtBj9UhkaBRLuBrgLFzQ2wbxdeCCJW/jgJA==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.2.0.tgz",
|
||||
"integrity": "sha512-eMKfzDxLGT8mnmPJTNMcjfO33fLiTDsrMlUVcp6b96ETbnJmd4uvZxVJSKPQfS+odwfVaGifhsB07J1LynFehw==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
|
@ -1067,9 +1067,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linux-x64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.1.0.tgz",
|
||||
"integrity": "sha512-yRj2+reB8iMg9W5sULM3S74jVS7zqSzHG3Ol/twnAAkAhnGQnpjj6e4ayUz7V+FpKypwgs82xbRdYtchTTUB+Q==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.2.0.tgz",
|
||||
"integrity": "sha512-ZW3FPWIc7K1sH9E3nxIGB3y3dZkpJlMnkk7z5tu1nSkBoCgw2nSRTFHI5pB/3CQaJM0pdzMF3paf9ckKMSE9Tg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1083,9 +1083,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linuxmusl-arm64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.1.0.tgz",
|
||||
"integrity": "sha512-jYZdG+whg0MDK+q2COKbYidaqW/WTz0cc1E+tMAusiDygrM4ypmSCjOJPmFTvHHJ8j/6cAGyeDWZOsK06tP33w==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.2.0.tgz",
|
||||
"integrity": "sha512-UG+LqQJbf5VJ8NWJ5Z3tdIe/HXjuIdo4JeVNADXBFuG7z9zjoegpzzGIyV5zQKi4zaJjnAd2+g2nna8TZvuW9Q==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -1099,9 +1099,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-libvips-linuxmusl-x64": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.1.0.tgz",
|
||||
"integrity": "sha512-wK7SBdwrAiycjXdkPnGCPLjYb9lD4l6Ze2gSdAGVZrEL05AOUJESWU2lhlC+Ffn5/G+VKuSm6zzbQSzFX/P65A==",
|
||||
"version": "1.2.0",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.2.0.tgz",
|
||||
"integrity": "sha512-SRYOLR7CXPgNze8akZwjoGBoN1ThNZoqpOgfnOxmWsklTGVfJiGJoC/Lod7aNMGA1jSsKWM1+HRX43OP6p9+6Q==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1115,9 +1115,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-arm": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.1.tgz",
|
||||
"integrity": "sha512-anKiszvACti2sGy9CirTlNyk7BjjZPiML1jt2ZkTdcvpLU1YH6CXwRAZCA2UmRXnhiIftXQ7+Oh62Ji25W72jA==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm/-/sharp-linux-arm-0.34.3.tgz",
|
||||
"integrity": "sha512-oBK9l+h6KBN0i3dC8rYntLiVfW8D8wH+NPNT3O/WBHeW0OQWCjfWksLUaPidsrDKpJgXp3G3/hkmhptAW0I3+A==",
|
||||
"cpu": [
|
||||
"arm"
|
||||
],
|
||||
|
@ -1133,13 +1133,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm": "1.1.0"
|
||||
"@img/sharp-libvips-linux-arm": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-arm64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.1.tgz",
|
||||
"integrity": "sha512-kX2c+vbvaXC6vly1RDf/IWNXxrlxLNpBVWkdpRq5Ka7OOKj6nr66etKy2IENf6FtOgklkg9ZdGpEu9kwdlcwOQ==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.34.3.tgz",
|
||||
"integrity": "sha512-QdrKe3EvQrqwkDrtuTIjI0bu6YEJHTgEeqdzI3uWJOH6G1O8Nl1iEeVYRGdj1h5I21CqxSvQp1Yv7xeU3ZewbA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -1155,13 +1155,35 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-arm64": "1.1.0"
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-ppc64": {
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-ppc64/-/sharp-linux-ppc64-0.34.3.tgz",
|
||||
"integrity": "sha512-GLtbLQMCNC5nxuImPR2+RgrviwKwVql28FWZIW1zWruy6zLgA5/x2ZXk3mxj58X/tszVF69KK0Is83V8YgWhLA==",
|
||||
"cpu": [
|
||||
"ppc64"
|
||||
],
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-s390x": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.1.tgz",
|
||||
"integrity": "sha512-7s0KX2tI9mZI2buRipKIw2X1ufdTeaRgwmRabt5bi9chYfhur+/C1OXg3TKg/eag1W+6CCWLVmSauV1owmRPxA==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.34.3.tgz",
|
||||
"integrity": "sha512-3gahT+A6c4cdc2edhsLHmIOXMb17ltffJlxR0aC2VPZfwKoTGZec6u5GrFgdR7ciJSsHT27BD3TIuGcuRT0KmQ==",
|
||||
"cpu": [
|
||||
"s390x"
|
||||
],
|
||||
|
@ -1177,13 +1199,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-s390x": "1.1.0"
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linux-x64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.1.tgz",
|
||||
"integrity": "sha512-wExv7SH9nmoBW3Wr2gvQopX1k8q2g5V5Iag8Zk6AVENsjwd+3adjwxtp3Dcu2QhOXr8W9NusBU6XcQUohBZ5MA==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linux-x64/-/sharp-linux-x64-0.34.3.tgz",
|
||||
"integrity": "sha512-8kYso8d806ypnSq3/Ly0QEw90V5ZoHh10yH0HnrzOCr6DKAPI6QVHvwleqMkVQ0m+fc7EH8ah0BB0QPuWY6zJQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1199,13 +1221,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linux-x64": "1.1.0"
|
||||
"@img/sharp-libvips-linux-x64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linuxmusl-arm64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.1.tgz",
|
||||
"integrity": "sha512-DfvyxzHxw4WGdPiTF0SOHnm11Xv4aQexvqhRDAoD00MzHekAj9a/jADXeXYCDFH/DzYruwHbXU7uz+H+nWmSOQ==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.34.3.tgz",
|
||||
"integrity": "sha512-vAjbHDlr4izEiXM1OTggpCcPg9tn4YriK5vAjowJsHwdBIdx0fYRsURkxLG2RLm9gyBq66gwtWI8Gx0/ov+JKQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
|
@ -1221,13 +1243,13 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.1.0"
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-linuxmusl-x64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.1.tgz",
|
||||
"integrity": "sha512-pax/kTR407vNb9qaSIiWVnQplPcGU8LRIJpDT5o8PdAx5aAA7AS3X9PS8Isw1/WfqgQorPotjrZL3Pqh6C5EBg==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.34.3.tgz",
|
||||
"integrity": "sha512-gCWUn9547K5bwvOn9l5XGAEjVTTRji4aPTqLzGXHvIr6bIDZKNTA34seMPgM0WmSf+RYBH411VavCejp3PkOeQ==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1243,20 +1265,20 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.1.0"
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-wasm32": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.1.tgz",
|
||||
"integrity": "sha512-YDybQnYrLQfEpzGOQe7OKcyLUCML4YOXl428gOOzBgN6Gw0rv8dpsJ7PqTHxBnXnwXr8S1mYFSLSa727tpz0xg==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-wasm32/-/sharp-wasm32-0.34.3.tgz",
|
||||
"integrity": "sha512-+CyRcpagHMGteySaWos8IbnXcHgfDn7pO2fiC2slJxvNq9gDipYBN42/RagzctVRKgxATmfqOSulgZv5e1RdMg==",
|
||||
"cpu": [
|
||||
"wasm32"
|
||||
],
|
||||
"license": "Apache-2.0 AND LGPL-3.0-or-later AND MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"@emnapi/runtime": "^1.4.0"
|
||||
"@emnapi/runtime": "^1.4.4"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
|
@ -1265,10 +1287,29 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-win32-arm64": {
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-arm64/-/sharp-win32-arm64-0.34.3.tgz",
|
||||
"integrity": "sha512-MjnHPnbqMXNC2UgeLJtX4XqoVHHlZNd+nPt1kRPmj63wURegwBhZlApELdtxM2OIZDRv/DFtLcNhVbd1z8GYXQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "Apache-2.0 AND LGPL-3.0-or-later",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
],
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://opencollective.com/libvips"
|
||||
}
|
||||
},
|
||||
"node_modules/@img/sharp-win32-ia32": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.1.tgz",
|
||||
"integrity": "sha512-WKf/NAZITnonBf3U1LfdjoMgNO5JYRSlhovhRhMxXVdvWYveM4kM3L8m35onYIdh75cOMCo1BexgVQcCDzyoWw==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.34.3.tgz",
|
||||
"integrity": "sha512-xuCdhH44WxuXgOM714hn4amodJMZl3OEvf0GVTm0BEyMeA2to+8HEdRPShH0SLYptJY1uBw+SCFP9WVQi1Q/cw==",
|
||||
"cpu": [
|
||||
"ia32"
|
||||
],
|
||||
|
@ -1285,9 +1326,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@img/sharp-win32-x64": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.1.tgz",
|
||||
"integrity": "sha512-hw1iIAHpNE8q3uMIRCgGOeDoz9KtFNarFLQclLxr/LK1VBkj8nby18RjFvr6aP7USRYAjTZW6yisnBWMX571Tw==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/@img/sharp-win32-x64/-/sharp-win32-x64-0.34.3.tgz",
|
||||
"integrity": "sha512-OWwz05d++TxzLEv4VnsTz5CmZ6mI6S05sfQGEMrNrQcOEERbX46332IvE7pO/EUiw7jUrrS40z/M7kPyjfl04g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
|
@ -1849,9 +1890,10 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/env": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.3.3.tgz",
|
||||
"integrity": "sha512-OdiMrzCl2Xi0VTjiQQUK0Xh7bJHnOuET2s+3V+Y40WJBAXrJeGA3f+I8MZJ/YQ3mVGi5XGR1L66oFlgqXhQ4Vw=="
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/env/-/env-15.5.3.tgz",
|
||||
"integrity": "sha512-RSEDTRqyihYXygx/OJXwvVupfr9m04+0vH8vyy0HfZ7keRto6VX9BbEk0J2PUk0VGy6YhklJUSrgForov5F9pw==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@next/eslint-plugin-next": {
|
||||
"version": "15.5.2",
|
||||
|
@ -1864,12 +1906,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-arm64": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.3.3.tgz",
|
||||
"integrity": "sha512-WRJERLuH+O3oYB4yZNVahSVFmtxRNjNF1I1c34tYMoJb0Pve+7/RaLAJJizyYiFhjYNGHRAE1Ri2Fd23zgDqhg==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-arm64/-/swc-darwin-arm64-15.5.3.tgz",
|
||||
"integrity": "sha512-nzbHQo69+au9wJkGKTU9lP7PXv0d1J5ljFpvb+LnEomLtSbJkbZyEs6sbF3plQmiOB2l9OBtN2tNSvCH1nQ9Jg==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
|
@ -1879,12 +1922,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-darwin-x64": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.3.3.tgz",
|
||||
"integrity": "sha512-XHdzH/yBc55lu78k/XwtuFR/ZXUTcflpRXcsu0nKmF45U96jt1tsOZhVrn5YH+paw66zOANpOnFQ9i6/j+UYvw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-15.5.3.tgz",
|
||||
"integrity": "sha512-w83w4SkOOhekJOcA5HBvHyGzgV1W/XvOfpkrxIse4uPWhYTTRwtGEM4v/jiXwNSJvfRvah0H8/uTLBKRXlef8g==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"darwin"
|
||||
|
@ -1894,12 +1938,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-gnu": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.3.3.tgz",
|
||||
"integrity": "sha512-VZ3sYL2LXB8znNGcjhocikEkag/8xiLgnvQts41tq6i+wql63SMS1Q6N8RVXHw5pEUjiof+II3HkDd7GFcgkzw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-15.5.3.tgz",
|
||||
"integrity": "sha512-+m7pfIs0/yvgVu26ieaKrifV8C8yiLe7jVp9SpcIzg7XmyyNE7toC1fy5IOQozmr6kWl/JONC51osih2RyoXRw==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
|
@ -1909,12 +1954,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-arm64-musl": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.3.3.tgz",
|
||||
"integrity": "sha512-h6Y1fLU4RWAp1HPNJWDYBQ+e3G7sLckyBXhmH9ajn8l/RSMnhbuPBV/fXmy3muMcVwoJdHL+UtzRzs0nXOf9SA==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-15.5.3.tgz",
|
||||
"integrity": "sha512-u3PEIzuguSenoZviZJahNLgCexGFhso5mxWCrrIMdvpZn6lkME5vc/ADZG8UUk5K1uWRy4hqSFECrON6UKQBbQ==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
|
@ -1924,12 +1970,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-gnu": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.3.3.tgz",
|
||||
"integrity": "sha512-jJ8HRiF3N8Zw6hGlytCj5BiHyG/K+fnTKVDEKvUCyiQ/0r5tgwO7OgaRiOjjRoIx2vwLR+Rz8hQoPrnmFbJdfw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-15.5.3.tgz",
|
||||
"integrity": "sha512-lDtOOScYDZxI2BENN9m0pfVPJDSuUkAD1YXSvlJF0DKwZt0WlA7T7o3wrcEr4Q+iHYGzEaVuZcsIbCps4K27sA==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
|
@ -1939,12 +1986,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-linux-x64-musl": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.3.3.tgz",
|
||||
"integrity": "sha512-HrUcTr4N+RgiiGn3jjeT6Oo208UT/7BuTr7K0mdKRBtTbT4v9zJqCDKO97DUqqoBK1qyzP1RwvrWTvU6EPh/Cw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-15.5.3.tgz",
|
||||
"integrity": "sha512-9vWVUnsx9PrY2NwdVRJ4dUURAQ8Su0sLRPqcCCxtX5zIQUBES12eRVHq6b70bbfaVaxIDGJN2afHui0eDm+cLg==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"linux"
|
||||
|
@ -1954,12 +2002,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-arm64-msvc": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.3.3.tgz",
|
||||
"integrity": "sha512-SxorONgi6K7ZUysMtRF3mIeHC5aA3IQLmKFQzU0OuhuUYwpOBc1ypaLJLP5Bf3M9k53KUUUj4vTPwzGvl/NwlQ==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-15.5.3.tgz",
|
||||
"integrity": "sha512-1CU20FZzY9LFQigRi6jM45oJMU3KziA5/sSG+dXeVaTm661snQP6xu3ykGxxwU5sLG3sh14teO/IOEPVsQMRfA==",
|
||||
"cpu": [
|
||||
"arm64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
|
@ -1969,12 +2018,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@next/swc-win32-x64-msvc": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.3.3.tgz",
|
||||
"integrity": "sha512-4QZG6F8enl9/S2+yIiOiju0iCTFd93d8VC1q9LZS4p/Xuk81W2QDjCFeoogmrWWkAD59z8ZxepBQap2dKS5ruw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-15.5.3.tgz",
|
||||
"integrity": "sha512-JMoLAq3n3y5tKXPQwCK5c+6tmwkuFDa2XAxz8Wm4+IVthdBZdZGh+lmiLUHg9f9IDwIQpUjp+ysd6OkYTyZRZw==",
|
||||
"cpu": [
|
||||
"x64"
|
||||
],
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"os": [
|
||||
"win32"
|
||||
|
@ -2874,22 +2924,22 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select": {
|
||||
"version": "2.2.5",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.5.tgz",
|
||||
"integrity": "sha512-HnMTdXEVuuyzx63ME0ut4+sEMYW6oouHWNGUZc7ddvUWIcfCva/AMoqEW/3wnEllriMWBa0RHspCYnfCWJQYmA==",
|
||||
"version": "2.2.6",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-select/-/react-select-2.2.6.tgz",
|
||||
"integrity": "sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@radix-ui/number": "1.1.1",
|
||||
"@radix-ui/primitive": "1.1.2",
|
||||
"@radix-ui/primitive": "1.1.3",
|
||||
"@radix-ui/react-collection": "1.1.7",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-direction": "1.1.1",
|
||||
"@radix-ui/react-dismissable-layer": "1.1.10",
|
||||
"@radix-ui/react-focus-guards": "1.1.2",
|
||||
"@radix-ui/react-dismissable-layer": "1.1.11",
|
||||
"@radix-ui/react-focus-guards": "1.1.3",
|
||||
"@radix-ui/react-focus-scope": "1.1.7",
|
||||
"@radix-ui/react-id": "1.1.1",
|
||||
"@radix-ui/react-popper": "1.2.7",
|
||||
"@radix-ui/react-popper": "1.2.8",
|
||||
"@radix-ui/react-portal": "1.1.9",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-slot": "1.2.3",
|
||||
|
@ -2916,13 +2966,19 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/primitive": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/primitive/-/primitive-1.1.3.tgz",
|
||||
"integrity": "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-dismissable-layer": {
|
||||
"version": "1.1.10",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.10.tgz",
|
||||
"integrity": "sha512-IM1zzRV4W3HtVgftdQiiOmA0AdJlCtMLe00FXaHwgt3rAnNsIyDqshvkIW3hj/iu5hu8ERP7KIYki6NkqDxAwQ==",
|
||||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-dismissable-layer/-/react-dismissable-layer-1.1.11.tgz",
|
||||
"integrity": "sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@radix-ui/primitive": "1.1.2",
|
||||
"@radix-ui/primitive": "1.1.3",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||
|
@ -2943,6 +2999,21 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-focus-guards": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-guards/-/react-focus-guards-1.1.3.tgz",
|
||||
"integrity": "sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==",
|
||||
"license": "MIT",
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-focus-scope": {
|
||||
"version": "1.1.7",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-focus-scope/-/react-focus-scope-1.1.7.tgz",
|
||||
|
@ -2968,38 +3039,6 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-popper": {
|
||||
"version": "1.2.7",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-popper/-/react-popper-1.2.7.tgz",
|
||||
"integrity": "sha512-IUFAccz1JyKcf/RjB552PlWwxjeCJB8/4KxT7EhBHOJM+mN7LdW+B3kacJXILm32xawcMMjb2i0cIZpo+f9kiQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@floating-ui/react-dom": "^2.0.0",
|
||||
"@radix-ui/react-arrow": "1.1.7",
|
||||
"@radix-ui/react-compose-refs": "1.1.2",
|
||||
"@radix-ui/react-context": "1.1.2",
|
||||
"@radix-ui/react-primitive": "2.1.3",
|
||||
"@radix-ui/react-use-callback-ref": "1.1.1",
|
||||
"@radix-ui/react-use-layout-effect": "1.1.1",
|
||||
"@radix-ui/react-use-rect": "1.1.1",
|
||||
"@radix-ui/react-use-size": "1.1.1",
|
||||
"@radix-ui/rect": "1.1.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@types/react": "*",
|
||||
"@types/react-dom": "*",
|
||||
"react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc",
|
||||
"react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@types/react": {
|
||||
"optional": true
|
||||
},
|
||||
"@types/react-dom": {
|
||||
"optional": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"node_modules/@radix-ui/react-select/node_modules/@radix-ui/react-portal": {
|
||||
"version": "1.1.9",
|
||||
"resolved": "https://registry.npmjs.org/@radix-ui/react-portal/-/react-portal-1.1.9.tgz",
|
||||
|
@ -3547,12 +3586,6 @@
|
|||
"@sinonjs/commons": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/@swc/counter": {
|
||||
"version": "0.1.3",
|
||||
"resolved": "https://registry.npmjs.org/@swc/counter/-/counter-0.1.3.tgz",
|
||||
"integrity": "sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ==",
|
||||
"license": "Apache-2.0"
|
||||
},
|
||||
"node_modules/@swc/helpers": {
|
||||
"version": "0.5.15",
|
||||
"resolved": "https://registry.npmjs.org/@swc/helpers/-/helpers-0.5.15.tgz",
|
||||
|
@ -3578,6 +3611,13 @@
|
|||
"tailwindcss": "4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/@tailwindcss/node/node_modules/tailwindcss": {
|
||||
"version": "4.1.6",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@tailwindcss/oxide": {
|
||||
"version": "4.1.6",
|
||||
"resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz",
|
||||
|
@ -3838,6 +3878,13 @@
|
|||
"tailwindcss": "4.1.6"
|
||||
}
|
||||
},
|
||||
"node_modules/@tailwindcss/postcss/node_modules/tailwindcss": {
|
||||
"version": "4.1.6",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@testing-library/dom": {
|
||||
"version": "10.4.1",
|
||||
"resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz",
|
||||
|
@ -5461,17 +5508,6 @@
|
|||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/busboy": {
|
||||
"version": "1.6.0",
|
||||
"resolved": "https://registry.npmjs.org/busboy/-/busboy-1.6.0.tgz",
|
||||
"integrity": "sha512-8SFQbg/0hQ9xy3UNTB0YEnsNBbWfhf7RtnzpL7TkBiTBRfrQ9Fxcnz7VJsleJpyp6rVLvXiuORqjlHi5q+PYuA==",
|
||||
"dependencies": {
|
||||
"streamsearch": "^1.1.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=10.16.0"
|
||||
}
|
||||
},
|
||||
"node_modules/bytes": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz",
|
||||
|
@ -8281,9 +8317,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/is-arrayish": {
|
||||
"version": "0.3.2",
|
||||
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.2.tgz",
|
||||
"integrity": "sha512-eVRqCvVlZbuw3GrM63ovNSNAeA1K16kaR/LRY/92w0zxQ5/1YzwblUX652i4Xs9RwAGjW9d9y6X88t8OaAJfWQ==",
|
||||
"version": "0.3.4",
|
||||
"resolved": "https://registry.npmjs.org/is-arrayish/-/is-arrayish-0.3.4.tgz",
|
||||
"integrity": "sha512-m6UrgzFVUYawGBh1dUsWR5M2Clqic9RVXC/9f8ceNlv2IcO9j9J/z8UoCLPqtsPBFNzEpfR3xftohbfqDx8EQA==",
|
||||
"license": "MIT",
|
||||
"optional": true
|
||||
},
|
||||
|
@ -11528,14 +11564,13 @@
|
|||
}
|
||||
},
|
||||
"node_modules/next": {
|
||||
"version": "15.3.3",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.3.3.tgz",
|
||||
"integrity": "sha512-JqNj29hHNmCLtNvd090SyRbXJiivQ+58XjCcrC50Crb5g5u2zi7Y2YivbsEfzk6AtVI80akdOQbaMZwWB1Hthw==",
|
||||
"version": "15.5.3",
|
||||
"resolved": "https://registry.npmjs.org/next/-/next-15.5.3.tgz",
|
||||
"integrity": "sha512-r/liNAx16SQj4D+XH/oI1dlpv9tdKJ6cONYPwwcCC46f2NjpaRWY+EKCzULfgQYV6YKXjHBchff2IZBSlZmJNw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@next/env": "15.3.3",
|
||||
"@swc/counter": "0.1.3",
|
||||
"@next/env": "15.5.3",
|
||||
"@swc/helpers": "0.5.15",
|
||||
"busboy": "1.6.0",
|
||||
"caniuse-lite": "^1.0.30001579",
|
||||
"postcss": "8.4.31",
|
||||
"styled-jsx": "5.1.6"
|
||||
|
@ -11547,19 +11582,19 @@
|
|||
"node": "^18.18.0 || ^19.8.0 || >= 20.0.0"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@next/swc-darwin-arm64": "15.3.3",
|
||||
"@next/swc-darwin-x64": "15.3.3",
|
||||
"@next/swc-linux-arm64-gnu": "15.3.3",
|
||||
"@next/swc-linux-arm64-musl": "15.3.3",
|
||||
"@next/swc-linux-x64-gnu": "15.3.3",
|
||||
"@next/swc-linux-x64-musl": "15.3.3",
|
||||
"@next/swc-win32-arm64-msvc": "15.3.3",
|
||||
"@next/swc-win32-x64-msvc": "15.3.3",
|
||||
"sharp": "^0.34.1"
|
||||
"@next/swc-darwin-arm64": "15.5.3",
|
||||
"@next/swc-darwin-x64": "15.5.3",
|
||||
"@next/swc-linux-arm64-gnu": "15.5.3",
|
||||
"@next/swc-linux-arm64-musl": "15.5.3",
|
||||
"@next/swc-linux-x64-gnu": "15.5.3",
|
||||
"@next/swc-linux-x64-musl": "15.5.3",
|
||||
"@next/swc-win32-arm64-msvc": "15.5.3",
|
||||
"@next/swc-win32-x64-msvc": "15.5.3",
|
||||
"sharp": "^0.34.3"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@opentelemetry/api": "^1.1.0",
|
||||
"@playwright/test": "^1.41.2",
|
||||
"@playwright/test": "^1.51.1",
|
||||
"babel-plugin-react-compiler": "*",
|
||||
"react": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
|
||||
"react-dom": "^18.2.0 || 19.0.0-rc-de68d2f4-20241204 || ^19.0.0",
|
||||
|
@ -13226,16 +13261,16 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/sharp": {
|
||||
"version": "0.34.1",
|
||||
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.1.tgz",
|
||||
"integrity": "sha512-1j0w61+eVxu7DawFJtnfYcvSv6qPFvfTaqzTQ2BLknVhHTwGS8sc63ZBF4rzkWMBVKybo4S5OBtDdZahh2A1xg==",
|
||||
"version": "0.34.3",
|
||||
"resolved": "https://registry.npmjs.org/sharp/-/sharp-0.34.3.tgz",
|
||||
"integrity": "sha512-eX2IQ6nFohW4DbvHIOLRB3MHFpYqaqvXd3Tp5e/T/dSH83fxaNJQRvDMhASmkNTsNTVF2/OOopzRCt7xokgPfg==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
"color": "^4.2.3",
|
||||
"detect-libc": "^2.0.3",
|
||||
"semver": "^7.7.1"
|
||||
"detect-libc": "^2.0.4",
|
||||
"semver": "^7.7.2"
|
||||
},
|
||||
"engines": {
|
||||
"node": "^18.17.0 || ^20.3.0 || >=21.0.0"
|
||||
|
@ -13244,26 +13279,28 @@
|
|||
"url": "https://opencollective.com/libvips"
|
||||
},
|
||||
"optionalDependencies": {
|
||||
"@img/sharp-darwin-arm64": "0.34.1",
|
||||
"@img/sharp-darwin-x64": "0.34.1",
|
||||
"@img/sharp-libvips-darwin-arm64": "1.1.0",
|
||||
"@img/sharp-libvips-darwin-x64": "1.1.0",
|
||||
"@img/sharp-libvips-linux-arm": "1.1.0",
|
||||
"@img/sharp-libvips-linux-arm64": "1.1.0",
|
||||
"@img/sharp-libvips-linux-ppc64": "1.1.0",
|
||||
"@img/sharp-libvips-linux-s390x": "1.1.0",
|
||||
"@img/sharp-libvips-linux-x64": "1.1.0",
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.1.0",
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.1.0",
|
||||
"@img/sharp-linux-arm": "0.34.1",
|
||||
"@img/sharp-linux-arm64": "0.34.1",
|
||||
"@img/sharp-linux-s390x": "0.34.1",
|
||||
"@img/sharp-linux-x64": "0.34.1",
|
||||
"@img/sharp-linuxmusl-arm64": "0.34.1",
|
||||
"@img/sharp-linuxmusl-x64": "0.34.1",
|
||||
"@img/sharp-wasm32": "0.34.1",
|
||||
"@img/sharp-win32-ia32": "0.34.1",
|
||||
"@img/sharp-win32-x64": "0.34.1"
|
||||
"@img/sharp-darwin-arm64": "0.34.3",
|
||||
"@img/sharp-darwin-x64": "0.34.3",
|
||||
"@img/sharp-libvips-darwin-arm64": "1.2.0",
|
||||
"@img/sharp-libvips-darwin-x64": "1.2.0",
|
||||
"@img/sharp-libvips-linux-arm": "1.2.0",
|
||||
"@img/sharp-libvips-linux-arm64": "1.2.0",
|
||||
"@img/sharp-libvips-linux-ppc64": "1.2.0",
|
||||
"@img/sharp-libvips-linux-s390x": "1.2.0",
|
||||
"@img/sharp-libvips-linux-x64": "1.2.0",
|
||||
"@img/sharp-libvips-linuxmusl-arm64": "1.2.0",
|
||||
"@img/sharp-libvips-linuxmusl-x64": "1.2.0",
|
||||
"@img/sharp-linux-arm": "0.34.3",
|
||||
"@img/sharp-linux-arm64": "0.34.3",
|
||||
"@img/sharp-linux-ppc64": "0.34.3",
|
||||
"@img/sharp-linux-s390x": "0.34.3",
|
||||
"@img/sharp-linux-x64": "0.34.3",
|
||||
"@img/sharp-linuxmusl-arm64": "0.34.3",
|
||||
"@img/sharp-linuxmusl-x64": "0.34.3",
|
||||
"@img/sharp-wasm32": "0.34.3",
|
||||
"@img/sharp-win32-arm64": "0.34.3",
|
||||
"@img/sharp-win32-ia32": "0.34.3",
|
||||
"@img/sharp-win32-x64": "0.34.3"
|
||||
}
|
||||
},
|
||||
"node_modules/shebang-command": {
|
||||
|
@ -13389,9 +13426,9 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/simple-swizzle": {
|
||||
"version": "0.2.2",
|
||||
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz",
|
||||
"integrity": "sha512-JA//kQgZtbuY83m+xT+tXJkmJncGMTFT+C+g2h2R9uxkYIrE2yy9sgmcLhCnw57/WSD+Eh3J97FPEDFnbXnDUg==",
|
||||
"version": "0.2.4",
|
||||
"resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.4.tgz",
|
||||
"integrity": "sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==",
|
||||
"license": "MIT",
|
||||
"optional": true,
|
||||
"dependencies": {
|
||||
|
@ -13512,14 +13549,6 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/streamsearch": {
|
||||
"version": "1.1.0",
|
||||
"resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-1.1.0.tgz",
|
||||
"integrity": "sha512-Mcc5wHehp9aXz1ax6bZUyY5afg9u2rv5cqQI3mRrYkGC8rW2hM02jWuwjtL++LS5qinSyhj2QfLyNsuc+VsExg==",
|
||||
"engines": {
|
||||
"node": ">=10.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/string-length": {
|
||||
"version": "4.0.2",
|
||||
"resolved": "https://registry.npmjs.org/string-length/-/string-length-4.0.2.tgz",
|
||||
|
@ -13843,9 +13872,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/tailwindcss": {
|
||||
"version": "4.1.6",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz",
|
||||
"integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==",
|
||||
"version": "4.1.13",
|
||||
"resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz",
|
||||
"integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==",
|
||||
"dev": true,
|
||||
"license": "MIT"
|
||||
},
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
"@radix-ui/react-collapsible": "^1.1.12",
|
||||
"@radix-ui/react-dialog": "^1.1.13",
|
||||
"@radix-ui/react-dropdown-menu": "^2.1.16",
|
||||
"@radix-ui/react-select": "^2.2.5",
|
||||
"@radix-ui/react-select": "^2.2.6",
|
||||
"@radix-ui/react-separator": "^1.1.7",
|
||||
"@radix-ui/react-slot": "^1.2.3",
|
||||
"@radix-ui/react-tooltip": "^1.2.8",
|
||||
|
@ -25,7 +25,7 @@
|
|||
"framer-motion": "^12.23.12",
|
||||
"llama-stack-client": "^0.2.21",
|
||||
"lucide-react": "^0.542.0",
|
||||
"next": "15.3.3",
|
||||
"next": "15.5.3",
|
||||
"next-auth": "^4.24.11",
|
||||
"next-themes": "^0.4.6",
|
||||
"react": "^19.0.0",
|
||||
|
|
|
@ -32,7 +32,7 @@ dependencies = [
|
|||
"jinja2>=3.1.6",
|
||||
"jsonschema",
|
||||
"llama-stack-client>=0.2.21",
|
||||
"openai>=1.99.6",
|
||||
"openai>=1.100.0", # for expires_after support
|
||||
"prompt-toolkit",
|
||||
"python-dotenv",
|
||||
"python-jose[cryptography]",
|
||||
|
@ -80,7 +80,6 @@ dev = [
|
|||
unit = [
|
||||
"sqlite-vec",
|
||||
"ollama",
|
||||
"openai",
|
||||
"aiosqlite",
|
||||
"aiohttp",
|
||||
"psycopg2-binary>=2.9.0",
|
||||
|
@ -105,7 +104,6 @@ unit = [
|
|||
# separately. If you are using "uv" to execute your tests, you can use the "--group" flag to specify extra
|
||||
# dependencies.
|
||||
test = [
|
||||
"openai>=1.100.0", # for expires_after support
|
||||
"aiosqlite",
|
||||
"aiohttp",
|
||||
"torch>=2.6.0",
|
||||
|
@ -356,6 +354,7 @@ warn_required_dynamic_aliases = true
|
|||
classmethod-decorators = ["classmethod", "pydantic.field_validator"]
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
addopts = ["--durations=10"]
|
||||
asyncio_mode = "auto"
|
||||
markers = [
|
||||
"allow_network: Allow network access for specific unit tests",
|
||||
|
|
|
@ -239,8 +239,9 @@ echo "Test pattern: ${TEST_PATTERN:-"(none)"}"
|
|||
echo ""
|
||||
|
||||
# Prepare inputs for gh workflow run
|
||||
INPUTS=
|
||||
if [[ -n "$TEST_SUBDIRS" ]]; then
|
||||
INPUTS="-f subdirs='$TEST_SUBDIRS'"
|
||||
INPUTS="$INPUTS -f subdirs='$TEST_SUBDIRS'"
|
||||
fi
|
||||
if [[ -n "$TEST_SETUP" ]]; then
|
||||
INPUTS="$INPUTS -f test-setup='$TEST_SETUP'"
|
||||
|
|
|
@ -6,12 +6,25 @@
|
|||
|
||||
|
||||
import time
|
||||
import unicodedata
|
||||
|
||||
import pytest
|
||||
|
||||
from ..test_cases.test_case import TestCase
|
||||
|
||||
|
||||
def _normalize_text(text: str) -> str:
|
||||
"""
|
||||
Normalize Unicode text by removing diacritical marks for comparison.
|
||||
|
||||
The test case streaming_01 expects the answer "Sol" for the question "What's the name of the Sun
|
||||
in latin?", but the model is returning "sōl" (with a macron over the 'o'), which is the correct
|
||||
Latin spelling. The test is failing because it's doing a simple case-insensitive string search
|
||||
for "sol" but the actual response contains the diacritical mark.
|
||||
"""
|
||||
return unicodedata.normalize("NFD", text).encode("ascii", "ignore").decode("ascii").lower()
|
||||
|
||||
|
||||
def provider_from_model(client_with_models, model_id):
|
||||
models = {m.identifier: m for m in client_with_models.models.list()}
|
||||
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
|
||||
|
@ -42,6 +55,10 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
|
|||
"remote::groq",
|
||||
"remote::gemini", # https://generativelanguage.googleapis.com/v1beta/openai/completions -> 404
|
||||
"remote::anthropic", # at least claude-3-{5,7}-{haiku,sonnet}-* / claude-{sonnet,opus}-4-* are not supported
|
||||
"remote::azure", # {'error': {'code': 'OperationNotSupported', 'message': 'The completion operation
|
||||
# does not work with the specified model, gpt-5-mini. Please choose different model and try
|
||||
# again. You can learn more about which models can be used with each operation here:
|
||||
# https://go.microsoft.com/fwlink/?linkid=2197993.'}}"}
|
||||
):
|
||||
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
|
||||
|
||||
|
@ -157,7 +174,8 @@ def test_openai_completion_non_streaming_suffix(llama_stack_client, client_with_
|
|||
assert len(response.choices) > 0
|
||||
choice = response.choices[0]
|
||||
assert len(choice.text) > 5
|
||||
assert "france" in choice.text.lower()
|
||||
normalized_text = _normalize_text(choice.text)
|
||||
assert "france" in normalized_text
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -248,7 +266,9 @@ def test_openai_chat_completion_non_streaming(compat_client, client_with_models,
|
|||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert len(message_content) > 0
|
||||
assert expected.lower() in message_content
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -272,10 +292,13 @@ def test_openai_chat_completion_streaming(compat_client, client_with_models, tex
|
|||
)
|
||||
streamed_content = []
|
||||
for chunk in response:
|
||||
if chunk.choices[0].delta.content:
|
||||
# On some providers like Azure, the choices are empty on the first chunk, so we need to check for that
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
streamed_content.append(chunk.choices[0].delta.content.lower().strip())
|
||||
assert len(streamed_content) > 0
|
||||
assert expected.lower() in "".join(streamed_content)
|
||||
normalized_expected = _normalize_text(expected)
|
||||
normalized_content = _normalize_text("".join(streamed_content))
|
||||
assert normalized_expected in normalized_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -308,8 +331,12 @@ def test_openai_chat_completion_streaming_with_n(compat_client, client_with_mode
|
|||
streamed_content.get(choice.index, "") + choice.delta.content.lower().strip()
|
||||
)
|
||||
assert len(streamed_content) == 2
|
||||
normalized_expected = _normalize_text(expected)
|
||||
for i, content in streamed_content.items():
|
||||
assert expected.lower() in content, f"Choice {i}: Expected {expected.lower()} in {content}"
|
||||
normalized_content = _normalize_text(content)
|
||||
assert normalized_expected in normalized_content, (
|
||||
f"Choice {i}: Expected {normalized_expected} in {normalized_content}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
@ -339,9 +366,9 @@ def test_inference_store(compat_client, client_with_models, text_model_id, strea
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if chunk.choices[0].delta.content:
|
||||
if chunk.choices and len(chunk.choices) > 0 and chunk.choices[0].delta.content:
|
||||
content += chunk.choices[0].delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
|
@ -410,11 +437,12 @@ def test_inference_store_tool_calls(compat_client, client_with_models, text_mode
|
|||
content = ""
|
||||
response_id = None
|
||||
for chunk in response:
|
||||
if response_id is None:
|
||||
if response_id is None and chunk.id:
|
||||
response_id = chunk.id
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
if chunk.choices and len(chunk.choices) > 0:
|
||||
if delta := chunk.choices[0].delta:
|
||||
if delta.content:
|
||||
content += delta.content
|
||||
else:
|
||||
response_id = response.id
|
||||
content = response.choices[0].message.content
|
||||
|
@ -484,4 +512,5 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
|
|||
stream=False,
|
||||
)
|
||||
message_content = response.choices[0].message.content.lower().strip()
|
||||
assert "hello world" in message_content
|
||||
normalized_content = _normalize_text(message_content)
|
||||
assert "hello world" in normalized_content
|
||||
|
|
|
@ -32,6 +32,7 @@ def skip_if_model_doesnt_support_completion(client_with_models, model_id):
|
|||
"remote::vertexai",
|
||||
"remote::groq",
|
||||
"remote::sambanova",
|
||||
"remote::azure",
|
||||
)
|
||||
or "openai-compat" in provider.provider_type
|
||||
):
|
||||
|
@ -44,7 +45,7 @@ def skip_if_model_doesnt_support_json_schema_structured_output(client_with_model
|
|||
provider_id = models[model_id].provider_id
|
||||
providers = {p.provider_id: p for p in client_with_models.providers.list()}
|
||||
provider = providers[provider_id]
|
||||
if provider.provider_type in ("remote::sambanova",):
|
||||
if provider.provider_type in ("remote::sambanova", "remote::azure"):
|
||||
pytest.skip(
|
||||
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
|
||||
)
|
||||
|
|
71
tests/integration/recordings/responses/0fda25b9241c.json
Normal file
71
tests/integration/recordings/responses/0fda25b9241c.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet do humans live on?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIXqfvjuluKkZtG3q2QJoSQhBU0",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Humans live on Earth \u2014 the third planet from the Sun. It's the only known planet that naturally supports life, with a breathable atmosphere, liquid water, and temperatures suitable for living organisms.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499901,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 112,
|
||||
"prompt_tokens": 13,
|
||||
"total_tokens": 125,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 64,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
448
tests/integration/recordings/responses/2b2ad549510d.json
Normal file
448
tests/integration/recordings/responses/2b2ad549510d.json
Normal file
|
@ -0,0 +1,448 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "Hello",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " world",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "!",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Hi",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " \u2014",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " how",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " can",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " I",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " help",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " you",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " today",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "?",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIgeXOClAuSm8xHAS6CYQ87lB8O",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499910,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
71
tests/integration/recordings/responses/57b67d1b1a36.json
Normal file
71
tests/integration/recordings/responses/57b67d1b1a36.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Which planet has rings around it with a name starting with letter S?"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIkT5cbqFazpungtewksVePcUNa",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Saturn. It's the planet famous for its prominent ring system made of ice and rock.",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499914,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 156,
|
||||
"prompt_tokens": 20,
|
||||
"total_tokens": 176,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 128,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
71
tests/integration/recordings/responses/8752115f8d0c.json
Normal file
71
tests/integration/recordings/responses/8752115f8d0c.json
Normal file
|
@ -0,0 +1,71 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Hello, world!"
|
||||
}
|
||||
],
|
||||
"stream": false
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIuyylsMNXspa83k8LrD8SQadNY",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": "Hello! \ud83d\udc4b How can I help you today \u2014 answer a question, write or edit something, debug code, brainstorm ideas, or anything else?",
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499924,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 40,
|
||||
"prompt_tokens": 10,
|
||||
"total_tokens": 50,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
1178
tests/integration/recordings/responses/94d11daee205.json
Normal file
1178
tests/integration/recordings/responses/94d11daee205.json
Normal file
File diff suppressed because it is too large
Load diff
1150
tests/integration/recordings/responses/9f3d749cc1c8.json
Normal file
1150
tests/integration/recordings/responses/9f3d749cc1c8.json
Normal file
File diff suppressed because it is too large
Load diff
98
tests/integration/recordings/responses/c791119e6359.json
Normal file
98
tests/integration/recordings/responses/c791119e6359.json
Normal file
|
@ -0,0 +1,98 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
}
|
||||
],
|
||||
"stream": false,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather in a given city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "The city to get the weather for"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": {
|
||||
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIwq9Odd0mOJMmw7ytv8iEazH4H",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"message": {
|
||||
"content": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"annotations": [],
|
||||
"audio": null,
|
||||
"function_call": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_yw18spRc1jjUlEyabbXBhB33",
|
||||
"function": {
|
||||
"arguments": "{\"city\":\"Tokyo\"}",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499926,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": {
|
||||
"completion_tokens": 88,
|
||||
"prompt_tokens": 151,
|
||||
"total_tokens": 239,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 64,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"is_streaming": false
|
||||
}
|
||||
}
|
2150
tests/integration/recordings/responses/d3e27b7234e2.json
Normal file
2150
tests/integration/recordings/responses/d3e27b7234e2.json
Normal file
File diff suppressed because it is too large
Load diff
310
tests/integration/recordings/responses/fb785db7fafd.json
Normal file
310
tests/integration/recordings/responses/fb785db7fafd.json
Normal file
|
@ -0,0 +1,310 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_weather",
|
||||
"description": "Get the weather in a given city",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"city": {
|
||||
"type": "string",
|
||||
"description": "The city to get the weather for"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_TMbEoYn9q0ZKtoxav5LpD9Ts",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "city",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\":\"",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "Tokyo",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECIiMMWyfACuKUYWEyYSazcnvRVo",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499912,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
556
tests/integration/recordings/responses/ff3271401fb4.json
Normal file
556
tests/integration/recordings/responses/ff3271401fb4.json
Normal file
|
@ -0,0 +1,556 @@
|
|||
{
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://shan-mfbb618r-eastus2.cognitiveservices.azure.com/openai/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-5-mini",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "What is the name of the US captial?"
|
||||
}
|
||||
],
|
||||
"stream": true
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-5-mini"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "",
|
||||
"object": "",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null,
|
||||
"prompt_filter_results": [
|
||||
{
|
||||
"prompt_index": 0,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "The",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " capital",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " the",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " United",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " States",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " is",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Washington",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ",",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " D",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".C",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ".",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " (",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": "District",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " of",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": " Columbia",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": ").",
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "chatcmpl-CECImr5TLfMFiZN3FUlfVdBLr51Fs",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"logprobs": null,
|
||||
"content_filter_results": {}
|
||||
}
|
||||
],
|
||||
"created": 1757499916,
|
||||
"model": "gpt-5-mini-2025-08-07",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": null,
|
||||
"system_fingerprint": null,
|
||||
"usage": null
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
}
|
||||
}
|
|
@ -49,16 +49,13 @@ def setup_openai_telemetry_data(llama_stack_client, text_model_id):
|
|||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 5: # 5 OpenAI completion traces
|
||||
break
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 5:
|
||||
pytest.fail(
|
||||
f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces."
|
||||
)
|
||||
|
||||
# Wait for 5 seconds to ensure traces has completed logging
|
||||
time.sleep(5)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
|
@ -185,11 +182,13 @@ def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id):
|
|||
assert len(response.choices) > 0, "Response should have at least one choice"
|
||||
|
||||
# Wait for telemetry to be recorded
|
||||
time.sleep(3)
|
||||
|
||||
# Check that we have more traces now
|
||||
final_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
final_count = len(final_traces)
|
||||
start_time = time.time()
|
||||
while time.time() - start_time < 30:
|
||||
final_traces = llama_stack_client.telemetry.query_traces(limit=20)
|
||||
final_count = len(final_traces)
|
||||
if final_count > initial_count:
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Should have at least as many traces as before (might have more due to other activity)
|
||||
assert final_count >= initial_count, "Should have at least as many traces after OpenAI call"
|
||||
|
|
|
@ -42,14 +42,11 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
|
|||
traces = llama_stack_client.telemetry.query_traces(limit=10)
|
||||
if len(traces) >= 4:
|
||||
break
|
||||
time.sleep(1)
|
||||
time.sleep(0.1)
|
||||
|
||||
if len(traces) < 4:
|
||||
pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.")
|
||||
|
||||
# Wait for 5 seconds to ensure traces has completed logging
|
||||
time.sleep(5)
|
||||
|
||||
yield
|
||||
|
||||
|
||||
|
|
|
@ -46,10 +46,7 @@ def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_i
|
|||
break
|
||||
except Exception:
|
||||
pass
|
||||
time.sleep(1)
|
||||
|
||||
# Wait additional time to ensure all metrics are processed
|
||||
time.sleep(5)
|
||||
time.sleep(0.1)
|
||||
|
||||
# Return the token lists for use in tests
|
||||
return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens}
|
||||
|
|
|
@ -183,6 +183,110 @@ def test_vector_db_insert_from_url_and_query(
|
|||
assert any("llama2" in chunk.content.lower() for chunk in response2.chunks)
|
||||
|
||||
|
||||
def test_rag_tool_openai_apis(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_openai_vector_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
# different document formats that should work with OpenAI APIs
|
||||
documents = [
|
||||
Document(
|
||||
document_id="text-doc",
|
||||
content="This is a plain text document about machine learning algorithms.",
|
||||
metadata={"type": "text", "category": "AI"},
|
||||
),
|
||||
Document(
|
||||
document_id="url-doc",
|
||||
content="https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/chat.rst",
|
||||
mime_type="text/plain",
|
||||
metadata={"type": "url", "source": "pytorch"},
|
||||
),
|
||||
Document(
|
||||
document_id="data-url-doc",
|
||||
content="data:text/plain;base64,VGhpcyBpcyBhIGRhdGEgVVJMIGRvY3VtZW50IGFib3V0IGRlZXAgbGVhcm5pbmcu", # "This is a data URL document about deep learning."
|
||||
metadata={"type": "data_url", "encoding": "base64"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
files_list = client_with_empty_registry.files.list()
|
||||
assert len(files_list.data) >= len(documents), (
|
||||
f"Expected at least {len(documents)} files, got {len(files_list.data)}"
|
||||
)
|
||||
|
||||
vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
|
||||
vector_store_id=actual_vector_db_id
|
||||
)
|
||||
assert len(vector_store_files.data) >= len(documents), f"Expected at least {len(documents)} files in vector store"
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="Tell me about machine learning and deep learning",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "machine learning" in content_text or "deep learning" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_exception_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_exception_handling"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="valid-doc",
|
||||
content="This is a valid document that should be processed successfully.",
|
||||
metadata={"status": "valid"},
|
||||
),
|
||||
Document(
|
||||
document_id="invalid-url-doc",
|
||||
content="https://nonexistent-domain-12345.com/invalid.txt",
|
||||
metadata={"status": "invalid_url"},
|
||||
),
|
||||
Document(
|
||||
document_id="another-valid-doc",
|
||||
content="This is another valid document for testing resilience.",
|
||||
metadata={"status": "valid"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="valid document",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "valid document" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
providers = [p for p in client_with_empty_registry.providers.list() if p.api == "vector_io"]
|
||||
assert len(providers) > 0
|
||||
|
@ -249,3 +353,107 @@ def test_rag_tool_insert_and_query(client_with_empty_registry, embedding_model_i
|
|||
"chunk_template": "This should raise a ValueError because it is missing the proper template variables",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def test_rag_tool_query_generation(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_query_generation_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="ai-doc",
|
||||
content="Artificial intelligence and machine learning are transforming technology.",
|
||||
metadata={"category": "AI"},
|
||||
),
|
||||
Document(
|
||||
document_id="banana-doc",
|
||||
content="Don't bring a banana to a knife fight.",
|
||||
metadata={"category": "wisdom"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="Tell me about AI",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "artificial intelligence" in content_text or "machine learning" in content_text
|
||||
|
||||
|
||||
def test_rag_tool_pdf_data_url_handling(client_with_empty_registry, embedding_model_id, embedding_dimension):
|
||||
vector_db_id = "test_pdf_data_url_db"
|
||||
|
||||
client_with_empty_registry.vector_dbs.register(
|
||||
vector_db_id=vector_db_id,
|
||||
embedding_model=embedding_model_id,
|
||||
embedding_dimension=embedding_dimension,
|
||||
)
|
||||
|
||||
available_vector_dbs = [vector_db.identifier for vector_db in client_with_empty_registry.vector_dbs.list()]
|
||||
actual_vector_db_id = available_vector_dbs[0]
|
||||
|
||||
sample_pdf = b"%PDF-1.3\n3 0 obj\n<</Type /Page\n/Parent 1 0 R\n/Resources 2 0 R\n/Contents 4 0 R>>\nendobj\n4 0 obj\n<</Filter /FlateDecode /Length 115>>\nstream\nx\x9c\x15\xcc1\x0e\x820\x18@\xe1\x9dS\xbcM]jk$\xd5\xd5(\x83!\x86\xa1\x17\xf8\xa3\xa5`LIh+\xd7W\xc6\xf7\r\xef\xc0\xbd\xd2\xaa\xb6,\xd5\xc5\xb1o\x0c\xa6VZ\xe3znn%\xf3o\xab\xb1\xe7\xa3:Y\xdc\x8bm\xeb\xf3&1\xc8\xd7\xd3\x97\xc82\xe6\x81\x87\xe42\xcb\x87Vb(\x12<\xdd<=}Jc\x0cL\x91\xee\xda$\xb5\xc3\xbd\xd7\xe9\x0f\x8d\x97 $\nendstream\nendobj\n1 0 obj\n<</Type /Pages\n/Kids [3 0 R ]\n/Count 1\n/MediaBox [0 0 595.28 841.89]\n>>\nendobj\n5 0 obj\n<</Type /Font\n/BaseFont /Helvetica\n/Subtype /Type1\n/Encoding /WinAnsiEncoding\n>>\nendobj\n2 0 obj\n<<\n/ProcSet [/PDF /Text /ImageB /ImageC /ImageI]\n/Font <<\n/F1 5 0 R\n>>\n/XObject <<\n>>\n>>\nendobj\n6 0 obj\n<<\n/Producer (PyFPDF 1.7.2 http://pyfpdf.googlecode.com/)\n/Title (This is a sample title.)\n/Author (Llama Stack Developers)\n/CreationDate (D:20250312165548)\n>>\nendobj\n7 0 obj\n<<\n/Type /Catalog\n/Pages 1 0 R\n/OpenAction [3 0 R /FitH null]\n/PageLayout /OneColumn\n>>\nendobj\nxref\n0 8\n0000000000 65535 f \n0000000272 00000 n \n0000000455 00000 n \n0000000009 00000 n \n0000000087 00000 n \n0000000359 00000 n \n0000000559 00000 n \n0000000734 00000 n \ntrailer\n<<\n/Size 8\n/Root 7 0 R\n/Info 6 0 R\n>>\nstartxref\n837\n%%EOF\n"
|
||||
|
||||
import base64
|
||||
|
||||
pdf_base64 = base64.b64encode(sample_pdf).decode("utf-8")
|
||||
pdf_data_url = f"data:application/pdf;base64,{pdf_base64}"
|
||||
|
||||
documents = [
|
||||
Document(
|
||||
document_id="test-pdf-data-url",
|
||||
content=pdf_data_url,
|
||||
metadata={"type": "pdf", "source": "data_url"},
|
||||
),
|
||||
]
|
||||
|
||||
client_with_empty_registry.tool_runtime.rag_tool.insert(
|
||||
documents=documents,
|
||||
vector_db_id=actual_vector_db_id,
|
||||
chunk_size_in_tokens=256,
|
||||
)
|
||||
|
||||
files_list = client_with_empty_registry.files.list()
|
||||
assert len(files_list.data) >= 1, "PDF should have been uploaded to Files API"
|
||||
|
||||
pdf_file = None
|
||||
for file in files_list.data:
|
||||
if file.filename and "test-pdf-data-url" in file.filename:
|
||||
pdf_file = file
|
||||
break
|
||||
|
||||
assert pdf_file is not None, "PDF file should be found in Files API"
|
||||
assert pdf_file.bytes == len(sample_pdf), f"File size should match original PDF ({len(sample_pdf)} bytes)"
|
||||
|
||||
file_content = client_with_empty_registry.files.retrieve_content(pdf_file.id)
|
||||
assert file_content.startswith(b"%PDF-"), "Retrieved file should be a valid PDF"
|
||||
|
||||
vector_store_files = client_with_empty_registry.vector_io.openai_list_files_in_vector_store(
|
||||
vector_store_id=actual_vector_db_id
|
||||
)
|
||||
assert len(vector_store_files.data) >= 1, "PDF should be attached to vector store"
|
||||
|
||||
response = client_with_empty_registry.tool_runtime.rag_tool.query(
|
||||
vector_db_ids=[actual_vector_db_id],
|
||||
content="sample title",
|
||||
)
|
||||
|
||||
assert_valid_text_response(response)
|
||||
content_text = " ".join([chunk.text for chunk in response.content]).lower()
|
||||
assert "sample title" in content_text or "title" in content_text
|
||||
|
|
|
@ -6,16 +6,18 @@
|
|||
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
from unittest.mock import AsyncMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
from openai import AsyncOpenAI
|
||||
from openai import NOT_GIVEN, AsyncOpenAI
|
||||
from openai.types.model import Model as OpenAIModel
|
||||
|
||||
# Import the real Pydantic response types instead of using Mocks
|
||||
from llama_stack.apis.inference import (
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
OpenAICompletion,
|
||||
OpenAIEmbeddingData,
|
||||
OpenAIEmbeddingsResponse,
|
||||
OpenAIEmbeddingUsage,
|
||||
|
@ -153,24 +155,22 @@ class TestInferenceRecording:
|
|||
|
||||
async def test_recording_mode(self, temp_storage_dir, real_openai_chat_response):
|
||||
"""Test that recording mode captures and stores responses."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_openai_chat_response
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_recording_mode"
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
# Verify the response was returned correctly
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
# Verify the response was returned correctly
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
client.chat.completions._post.assert_called_once()
|
||||
|
||||
# Verify recording was stored
|
||||
storage = ResponseStorage(temp_storage_dir)
|
||||
|
@ -178,40 +178,74 @@ class TestInferenceRecording:
|
|||
|
||||
async def test_replay_mode(self, temp_storage_dir, real_openai_chat_response):
|
||||
"""Test that replay mode returns stored responses without making real calls."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_openai_chat_response
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_replay_mode"
|
||||
# First, record a response
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
client.chat.completions._post.assert_called_once()
|
||||
|
||||
# Now test replay mode - should not call the original method
|
||||
with patch("openai.resources.chat.completions.AsyncCompletions.create") as mock_create_patch:
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.chat.completions._post = AsyncMock(return_value=real_openai_chat_response)
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
response = await client.chat.completions.create(
|
||||
model="llama3.2:3b",
|
||||
messages=[{"role": "user", "content": "Hello, how are you?"}],
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
|
||||
# Verify we got the recorded response
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
# Verify we got the recorded response
|
||||
assert response.choices[0].message.content == "Hello! I'm doing well, thank you for asking."
|
||||
|
||||
# Verify the original method was NOT called
|
||||
mock_create_patch.assert_not_called()
|
||||
# Verify the original method was NOT called
|
||||
client.chat.completions._post.assert_not_called()
|
||||
|
||||
async def test_replay_mode_models(self, temp_storage_dir):
|
||||
"""Test that replay mode returns stored responses without making real model listing calls."""
|
||||
|
||||
async def _async_iterator(models):
|
||||
for model in models:
|
||||
yield model
|
||||
|
||||
models = [
|
||||
OpenAIModel(id="foo", created=1, object="model", owned_by="test"),
|
||||
OpenAIModel(id="bar", created=2, object="model", owned_by="test"),
|
||||
]
|
||||
|
||||
expected_ids = {m.id for m in models}
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_replay_mode_models"
|
||||
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_called_once()
|
||||
|
||||
# record the call
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=temp_storage_dir):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_called_once()
|
||||
|
||||
# replay the call
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=temp_storage_dir):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.models._get_api_list = Mock(return_value=_async_iterator(models))
|
||||
assert {m.id async for m in client.models.list()} == expected_ids
|
||||
client.models._get_api_list.assert_not_called()
|
||||
|
||||
async def test_replay_missing_recording(self, temp_storage_dir):
|
||||
"""Test that replay mode fails when no recording is found."""
|
||||
|
@ -228,36 +262,110 @@ class TestInferenceRecording:
|
|||
async def test_embeddings_recording(self, temp_storage_dir, real_embeddings_response):
|
||||
"""Test recording and replay of embeddings calls."""
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
return real_embeddings_response
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
encoding_format=NOT_GIVEN,
|
||||
)
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
client.embeddings._post.assert_called_once()
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_embeddings_recording"
|
||||
# Record
|
||||
with patch("openai.resources.embeddings.AsyncEmbeddings.create", side_effect=mock_create):
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
|
||||
response = await client.embeddings.create(
|
||||
model="nomic-embed-text", input=["Hello world", "Test embedding"]
|
||||
)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
encoding_format=NOT_GIVEN,
|
||||
dimensions=NOT_GIVEN,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
assert len(response.data) == 2
|
||||
assert len(response.data) == 2
|
||||
|
||||
# Replay
|
||||
with patch("openai.resources.embeddings.AsyncEmbeddings.create") as mock_create_patch:
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.embeddings._post = AsyncMock(return_value=real_embeddings_response)
|
||||
|
||||
response = await client.embeddings.create(
|
||||
model="nomic-embed-text", input=["Hello world", "Test embedding"]
|
||||
)
|
||||
response = await client.embeddings.create(
|
||||
model=real_embeddings_response.model,
|
||||
input=["Hello world", "Test embedding"],
|
||||
)
|
||||
|
||||
# Verify we got the recorded response
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
# Verify we got the recorded response
|
||||
assert len(response.data) == 2
|
||||
assert response.data[0].embedding == [0.1, 0.2, 0.3]
|
||||
|
||||
# Verify original method was not called
|
||||
mock_create_patch.assert_not_called()
|
||||
# Verify original method was not called
|
||||
client.embeddings._post.assert_not_called()
|
||||
|
||||
async def test_completions_recording(self, temp_storage_dir):
|
||||
real_completions_response = OpenAICompletion(
|
||||
id="test_completion",
|
||||
object="text_completion",
|
||||
created=1234567890,
|
||||
model="llama3.2:3b",
|
||||
choices=[
|
||||
{
|
||||
"text": "Hello! I'm doing well, thank you for asking.",
|
||||
"index": 0,
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
temp_storage_dir = temp_storage_dir / "test_completions_recording"
|
||||
|
||||
# baseline - mock works without recording
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_called_once()
|
||||
|
||||
# Record
|
||||
with inference_recording(mode=InferenceMode.RECORD, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
user=NOT_GIVEN,
|
||||
)
|
||||
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_called_once()
|
||||
|
||||
# Replay
|
||||
with inference_recording(mode=InferenceMode.REPLAY, storage_dir=str(temp_storage_dir)):
|
||||
client = AsyncOpenAI(base_url="http://localhost:11434/v1", api_key="test")
|
||||
client.completions._post = AsyncMock(return_value=real_completions_response)
|
||||
response = await client.completions.create(
|
||||
model=real_completions_response.model,
|
||||
prompt="Hello, how are you?",
|
||||
temperature=0.7,
|
||||
max_tokens=50,
|
||||
)
|
||||
assert response.choices[0].text == real_completions_response.choices[0].text
|
||||
client.completions._post.assert_not_called()
|
||||
|
||||
async def test_live_mode(self, real_openai_chat_response):
|
||||
"""Test that live mode passes through to original methods."""
|
||||
|
|
|
@ -6,19 +6,15 @@
|
|||
|
||||
import asyncio
|
||||
import json
|
||||
import logging # allow-direct-logging
|
||||
import threading
|
||||
import time
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from typing import Any
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, PropertyMock, patch
|
||||
|
||||
import pytest
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChatCompletionChunk as OpenAIChatCompletionChunk,
|
||||
)
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
Choice as OpenAIChoice,
|
||||
Choice as OpenAIChoiceChunk,
|
||||
)
|
||||
from openai.types.chat.chat_completion_chunk import (
|
||||
ChoiceDelta as OpenAIChoiceDelta,
|
||||
|
@ -35,6 +31,9 @@ from llama_stack.apis.inference import (
|
|||
ChatCompletionRequest,
|
||||
ChatCompletionResponseEventType,
|
||||
CompletionMessage,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletion,
|
||||
OpenAIChoice,
|
||||
SystemMessage,
|
||||
ToolChoice,
|
||||
ToolConfig,
|
||||
|
@ -61,41 +60,6 @@ from llama_stack.providers.remote.inference.vllm.vllm import (
|
|||
# -v -s --tb=short --disable-warnings
|
||||
|
||||
|
||||
class MockInferenceAdapterWithSleep:
|
||||
def __init__(self, sleep_time: int, response: dict[str, Any]):
|
||||
self.httpd = None
|
||||
|
||||
class DelayedRequestHandler(BaseHTTPRequestHandler):
|
||||
# ruff: noqa: N802
|
||||
def do_POST(self):
|
||||
time.sleep(sleep_time)
|
||||
response_body = json.dumps(response).encode("utf-8")
|
||||
self.send_response(code=200)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", len(response_body))
|
||||
self.end_headers()
|
||||
self.wfile.write(response_body)
|
||||
|
||||
self.request_handler = DelayedRequestHandler
|
||||
|
||||
def __enter__(self):
|
||||
httpd = HTTPServer(("", 0), self.request_handler)
|
||||
self.httpd = httpd
|
||||
host, port = httpd.server_address
|
||||
httpd_thread = threading.Thread(target=httpd.serve_forever)
|
||||
httpd_thread.daemon = True # stop server if this thread terminates
|
||||
httpd_thread.start()
|
||||
|
||||
config = VLLMInferenceAdapterConfig(url=f"http://{host}:{port}")
|
||||
inference_adapter = VLLMInferenceAdapter(config)
|
||||
return inference_adapter
|
||||
|
||||
def __exit__(self, _exc_type, _exc_value, _traceback):
|
||||
if self.httpd:
|
||||
self.httpd.shutdown()
|
||||
self.httpd.server_close()
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mock_openai_models_list():
|
||||
with patch("openai.resources.models.AsyncModels.list", new_callable=AsyncMock) as mock_list:
|
||||
|
@ -150,10 +114,12 @@ async def test_tool_call_response(vllm_inference_adapter):
|
|||
"""Verify that tool call arguments from a CompletionMessage are correctly converted
|
||||
into the expected JSON format."""
|
||||
|
||||
# Patch the call to vllm so we can inspect the arguments sent were correct
|
||||
with patch.object(
|
||||
vllm_inference_adapter.client.chat.completions, "create", new_callable=AsyncMock
|
||||
) as mock_nonstream_completion:
|
||||
# Patch the client property to avoid instantiating a real AsyncOpenAI client
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock()
|
||||
mock_create_client.return_value = mock_client
|
||||
|
||||
messages = [
|
||||
SystemMessage(content="You are a helpful assistant"),
|
||||
UserMessage(content="How many?"),
|
||||
|
@ -179,7 +145,7 @@ async def test_tool_call_response(vllm_inference_adapter):
|
|||
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
||||
)
|
||||
|
||||
assert mock_nonstream_completion.call_args.kwargs["messages"][2]["tool_calls"] == [
|
||||
assert mock_client.chat.completions.create.call_args.kwargs["messages"][2]["tool_calls"] == [
|
||||
{
|
||||
"id": "foo",
|
||||
"type": "function",
|
||||
|
@ -199,7 +165,7 @@ async def test_tool_call_delta_empty_tool_call_buf():
|
|||
|
||||
async def mock_stream():
|
||||
delta = OpenAIChoiceDelta(content="", tool_calls=None)
|
||||
choices = [OpenAIChoice(delta=delta, finish_reason="stop", index=0)]
|
||||
choices = [OpenAIChoiceChunk(delta=delta, finish_reason="stop", index=0)]
|
||||
mock_chunk = OpenAIChatCompletionChunk(
|
||||
id="chunk-1",
|
||||
created=1,
|
||||
|
@ -225,7 +191,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -250,7 +216,7 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -275,7 +241,9 @@ async def test_tool_call_delta_streaming_arguments_dict():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
|
||||
)
|
||||
],
|
||||
)
|
||||
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
|
||||
|
@ -299,7 +267,7 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -324,7 +292,7 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(
|
||||
content="",
|
||||
tool_calls=[
|
||||
|
@ -349,7 +317,9 @@ async def test_multiple_tool_calls():
|
|||
model="foo",
|
||||
object="chat.completion.chunk",
|
||||
choices=[
|
||||
OpenAIChoice(delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0)
|
||||
OpenAIChoiceChunk(
|
||||
delta=OpenAIChoiceDelta(content="", tool_calls=None), finish_reason="tool_calls", index=0
|
||||
)
|
||||
],
|
||||
)
|
||||
for chunk in [mock_chunk_1, mock_chunk_2, mock_chunk_3]:
|
||||
|
@ -393,59 +363,6 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
|
|||
assert chunks[0].event.event_type.value == "start"
|
||||
|
||||
|
||||
@pytest.mark.allow_network
|
||||
def test_chat_completion_doesnt_block_event_loop(caplog):
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.set_debug(True)
|
||||
caplog.set_level(logging.WARNING)
|
||||
|
||||
# Log when event loop is blocked for more than 200ms
|
||||
loop.slow_callback_duration = 0.5
|
||||
# Sleep for 500ms in our delayed http response
|
||||
sleep_time = 0.5
|
||||
|
||||
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm-inference")
|
||||
mock_response = {
|
||||
"id": "chatcmpl-abc123",
|
||||
"object": "chat.completion",
|
||||
"created": 1,
|
||||
"modle": "mock-model",
|
||||
"choices": [
|
||||
{
|
||||
"message": {"content": ""},
|
||||
"logprobs": None,
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
}
|
||||
],
|
||||
}
|
||||
|
||||
async def do_chat_completion():
|
||||
await inference_adapter.chat_completion(
|
||||
"mock-model",
|
||||
[],
|
||||
stream=False,
|
||||
tools=None,
|
||||
tool_config=ToolConfig(tool_choice=ToolChoice.auto),
|
||||
)
|
||||
|
||||
with MockInferenceAdapterWithSleep(sleep_time, mock_response) as inference_adapter:
|
||||
inference_adapter.model_store = AsyncMock()
|
||||
inference_adapter.model_store.get_model.return_value = mock_model
|
||||
loop.run_until_complete(inference_adapter.initialize())
|
||||
|
||||
# Clear the logs so far and run the actual chat completion we care about
|
||||
caplog.clear()
|
||||
loop.run_until_complete(do_chat_completion())
|
||||
|
||||
# Ensure we don't have any asyncio warnings in the captured log
|
||||
# records from our chat completion call. A message gets logged
|
||||
# here any time we exceed the slow_callback_duration configured
|
||||
# above.
|
||||
asyncio_warnings = [record.message for record in caplog.records if record.name == "asyncio"]
|
||||
assert not asyncio_warnings
|
||||
|
||||
|
||||
async def test_get_params_empty_tools(vllm_inference_adapter):
|
||||
request = ChatCompletionRequest(
|
||||
tools=[],
|
||||
|
@ -641,9 +558,7 @@ async def test_health_status_success(vllm_inference_adapter):
|
|||
This test verifies that the health method returns a HealthResponse with status OK, only
|
||||
when the connection to the vLLM server is successful.
|
||||
"""
|
||||
# Set vllm_inference_adapter.client to None to ensure _create_client is called
|
||||
vllm_inference_adapter.client = None
|
||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
# Create mock client and models
|
||||
mock_client = MagicMock()
|
||||
mock_models = MagicMock()
|
||||
|
@ -674,8 +589,7 @@ async def test_health_status_failure(vllm_inference_adapter):
|
|||
This test verifies that the health method returns a HealthResponse with status ERROR
|
||||
and an appropriate error message when the connection to the vLLM server fails.
|
||||
"""
|
||||
vllm_inference_adapter.client = None
|
||||
with patch.object(vllm_inference_adapter, "_create_client") as mock_create_client:
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
# Create mock client and models
|
||||
mock_client = MagicMock()
|
||||
mock_models = MagicMock()
|
||||
|
@ -697,3 +611,48 @@ async def test_health_status_failure(vllm_inference_adapter):
|
|||
assert "Health check failed: Connection failed" in health_response["message"]
|
||||
|
||||
mock_models.list.assert_called_once()
|
||||
|
||||
|
||||
async def test_openai_chat_completion_is_async(vllm_inference_adapter):
|
||||
"""
|
||||
Verify that openai_chat_completion is async and doesn't block the event loop.
|
||||
|
||||
To do this we mock the underlying inference with a sleep, start multiple
|
||||
inference calls in parallel, and ensure the total time taken is less
|
||||
than the sum of the individual sleep times.
|
||||
"""
|
||||
sleep_time = 0.5
|
||||
|
||||
async def mock_create(*args, **kwargs):
|
||||
await asyncio.sleep(sleep_time)
|
||||
return OpenAIChatCompletion(
|
||||
id="chatcmpl-abc123",
|
||||
created=1,
|
||||
model="mock-model",
|
||||
choices=[
|
||||
OpenAIChoice(
|
||||
message=OpenAIAssistantMessageParam(
|
||||
content="nothing interesting",
|
||||
),
|
||||
finish_reason="stop",
|
||||
index=0,
|
||||
)
|
||||
],
|
||||
)
|
||||
|
||||
async def do_inference():
|
||||
await vllm_inference_adapter.openai_chat_completion(
|
||||
"mock-model", messages=["one fish", "two fish"], stream=False
|
||||
)
|
||||
|
||||
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_create_client:
|
||||
mock_client = MagicMock()
|
||||
mock_client.chat.completions.create = AsyncMock(side_effect=mock_create)
|
||||
mock_create_client.return_value = mock_client
|
||||
|
||||
start_time = time.time()
|
||||
await asyncio.gather(do_inference(), do_inference(), do_inference(), do_inference())
|
||||
total_time = time.time() - start_time
|
||||
|
||||
assert mock_create_client.call_count == 4 # no cheating
|
||||
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
|
||||
|
|
53
tests/unit/providers/test_bedrock.py
Normal file
53
tests/unit/providers/test_bedrock.py
Normal file
|
@ -0,0 +1,53 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.remote.inference.bedrock.bedrock import (
|
||||
_get_region_prefix,
|
||||
_to_inference_profile_id,
|
||||
)
|
||||
|
||||
|
||||
def test_region_prefixes():
|
||||
assert _get_region_prefix("us-east-1") == "us."
|
||||
assert _get_region_prefix("eu-west-1") == "eu."
|
||||
assert _get_region_prefix("ap-south-1") == "ap."
|
||||
assert _get_region_prefix("ca-central-1") == "us."
|
||||
|
||||
# Test case insensitive
|
||||
assert _get_region_prefix("US-EAST-1") == "us."
|
||||
assert _get_region_prefix("EU-WEST-1") == "eu."
|
||||
assert _get_region_prefix("Ap-South-1") == "ap."
|
||||
|
||||
# Test None region
|
||||
assert _get_region_prefix(None) == "us."
|
||||
|
||||
|
||||
def test_model_id_conversion():
|
||||
# Basic conversion
|
||||
assert (
|
||||
_to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "us-east-1") == "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
||||
|
||||
# Already has prefix
|
||||
assert (
|
||||
_to_inference_profile_id("us.meta.llama3-1-70b-instruct-v1:0", "us-east-1")
|
||||
== "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
||||
|
||||
# ARN should be returned unchanged
|
||||
arn = "arn:aws:bedrock:us-east-1:123456789012:inference-profile/us.meta.llama3-1-70b-instruct-v1:0"
|
||||
assert _to_inference_profile_id(arn, "us-east-1") == arn
|
||||
|
||||
# ARN should be returned unchanged even without region
|
||||
assert _to_inference_profile_id(arn) == arn
|
||||
|
||||
# Optional region parameter defaults to us-east-1
|
||||
assert _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0") == "us.meta.llama3-1-70b-instruct-v1:0"
|
||||
|
||||
# Different regions work with optional parameter
|
||||
assert (
|
||||
_to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "eu-west-1") == "eu.meta.llama3-1-70b-instruct-v1:0"
|
||||
)
|
|
@ -178,3 +178,41 @@ def test_content_from_data_and_mime_type_both_encodings_fail():
|
|||
# Should raise an exception instead of returning empty string
|
||||
with pytest.raises(UnicodeDecodeError):
|
||||
content_from_data_and_mime_type(data, mime_type)
|
||||
|
||||
|
||||
async def test_memory_tool_error_handling():
|
||||
"""Test that memory tool handles various failures gracefully without crashing."""
|
||||
from llama_stack.providers.inline.tool_runtime.rag.config import RagToolRuntimeConfig
|
||||
from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRuntimeImpl
|
||||
|
||||
config = RagToolRuntimeConfig()
|
||||
memory_tool = MemoryToolRuntimeImpl(
|
||||
config=config,
|
||||
vector_io_api=AsyncMock(),
|
||||
inference_api=AsyncMock(),
|
||||
files_api=AsyncMock(),
|
||||
)
|
||||
|
||||
docs = [
|
||||
RAGDocument(document_id="good_doc", content="Good content", metadata={}),
|
||||
RAGDocument(document_id="bad_url_doc", content=URL(uri="https://bad.url"), metadata={}),
|
||||
RAGDocument(document_id="another_good_doc", content="Another good content", metadata={}),
|
||||
]
|
||||
|
||||
mock_file1 = MagicMock()
|
||||
mock_file1.id = "file_good1"
|
||||
mock_file2 = MagicMock()
|
||||
mock_file2.id = "file_good2"
|
||||
memory_tool.files_api.openai_upload_file.side_effect = [mock_file1, mock_file2]
|
||||
|
||||
with patch("httpx.AsyncClient") as mock_client:
|
||||
mock_instance = AsyncMock()
|
||||
mock_instance.get.side_effect = Exception("Bad URL")
|
||||
mock_client.return_value.__aenter__.return_value = mock_instance
|
||||
|
||||
# won't raise exception despite one document failing
|
||||
await memory_tool.insert(docs, "vector_store_123")
|
||||
|
||||
# processed 2 documents successfully, skipped 1
|
||||
assert memory_tool.files_api.openai_upload_file.call_count == 2
|
||||
assert memory_tool.vector_io_api.openai_attach_file_to_vector_store.call_count == 2
|
||||
|
|
|
@ -26,9 +26,9 @@ def test_generate_chunk_id():
|
|||
|
||||
chunk_ids = sorted([chunk.chunk_id for chunk in chunks])
|
||||
assert chunk_ids == [
|
||||
"177a1368-f6a8-0c50-6e92-18677f2c3de3",
|
||||
"bc744db3-1b25-0a9c-cdff-b6ba3df73c36",
|
||||
"f68df25d-d9aa-ab4d-5684-64a233add20d",
|
||||
"31d1f9a3-c8d2-66e7-3c37-af2acd329778",
|
||||
"d07dade7-29c0-cda7-df29-0249a1dcbc3e",
|
||||
"d14f75a1-5855-7f72-2c78-d9fc4275a346",
|
||||
]
|
||||
|
||||
|
||||
|
@ -36,14 +36,14 @@ def test_generate_chunk_id_with_window():
|
|||
chunk = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
chunk_id1 = generate_chunk_id("doc-1", chunk, chunk_window="0-1")
|
||||
chunk_id2 = generate_chunk_id("doc-1", chunk, chunk_window="1-2")
|
||||
assert chunk_id1 == "149018fe-d0eb-0f8d-5f7f-726bdd2aeedb"
|
||||
assert chunk_id2 == "4562c1ee-9971-1f3b-51a6-7d05e5211154"
|
||||
assert chunk_id1 == "8630321a-d9cb-2bb6-cd28-ebf68dafd866"
|
||||
assert chunk_id2 == "13a1c09a-cbda-b61a-2d1a-7baa90888685"
|
||||
|
||||
|
||||
def test_chunk_id():
|
||||
# Test with existing chunk ID
|
||||
chunk_with_id = Chunk(content="test", metadata={"document_id": "existing-id"})
|
||||
assert chunk_with_id.chunk_id == "84ededcc-b80b-a83e-1a20-ca6515a11350"
|
||||
assert chunk_with_id.chunk_id == "11704f92-42b6-61df-bf85-6473e7708fbd"
|
||||
|
||||
# Test with document ID in metadata
|
||||
chunk_with_doc_id = Chunk(content="test", metadata={"document_id": "doc-1"})
|
||||
|
|
|
@ -65,6 +65,9 @@ async def test_inference_store_pagination_basic():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test 1: First page with limit=2, descending order (default)
|
||||
result = await store.list_chat_completions(limit=2, order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
@ -108,6 +111,9 @@ async def test_inference_store_pagination_ascending():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test ascending order pagination
|
||||
result = await store.list_chat_completions(limit=1, order=Order.asc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -143,6 +149,9 @@ async def test_inference_store_pagination_with_model_filter():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test pagination with model filter
|
||||
result = await store.list_chat_completions(limit=1, model="model-a", order=Order.desc)
|
||||
assert len(result.data) == 1
|
||||
|
@ -190,6 +199,9 @@ async def test_inference_store_pagination_no_limit():
|
|||
input_messages = [OpenAIUserMessageParam(role="user", content=f"Test message for {completion_id}")]
|
||||
await store.store_chat_completion(completion, input_messages)
|
||||
|
||||
# Wait for all queued writes to complete
|
||||
await store.flush()
|
||||
|
||||
# Test without limit
|
||||
result = await store.list_chat_completions(order=Order.desc)
|
||||
assert len(result.data) == 2
|
||||
|
|
27
uv.lock
generated
27
uv.lock
generated
|
@ -1,5 +1,5 @@
|
|||
version = 1
|
||||
revision = 3
|
||||
revision = 2
|
||||
requires-python = ">=3.12"
|
||||
resolution-markers = [
|
||||
"(python_full_version >= '3.13' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.13' and sys_platform != 'darwin' and sys_platform != 'linux')",
|
||||
|
@ -1839,7 +1839,6 @@ test = [
|
|||
{ name = "datasets" },
|
||||
{ name = "mcp" },
|
||||
{ name = "milvus-lite" },
|
||||
{ name = "openai" },
|
||||
{ name = "psycopg2-binary" },
|
||||
{ name = "pymilvus" },
|
||||
{ name = "pypdf" },
|
||||
|
@ -1865,7 +1864,6 @@ unit = [
|
|||
{ name = "milvus-lite" },
|
||||
{ name = "moto", extra = ["s3"] },
|
||||
{ name = "ollama" },
|
||||
{ name = "openai" },
|
||||
{ name = "psycopg2-binary" },
|
||||
{ name = "pymilvus" },
|
||||
{ name = "pypdf" },
|
||||
|
@ -1889,7 +1887,7 @@ requires-dist = [
|
|||
{ name = "jsonschema" },
|
||||
{ name = "llama-stack-client", specifier = ">=0.2.21" },
|
||||
{ name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.21" },
|
||||
{ name = "openai", specifier = ">=1.99.6" },
|
||||
{ name = "openai", specifier = ">=1.100.0" },
|
||||
{ name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
|
||||
{ name = "opentelemetry-sdk", specifier = ">=1.30.0" },
|
||||
{ name = "pandas", marker = "extra == 'ui'" },
|
||||
|
@ -1959,7 +1957,6 @@ test = [
|
|||
{ name = "datasets", specifier = ">=4.0.0" },
|
||||
{ name = "mcp" },
|
||||
{ name = "milvus-lite", specifier = ">=2.5.0" },
|
||||
{ name = "openai", specifier = ">=1.100.0" },
|
||||
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
|
||||
{ name = "pymilvus", specifier = ">=2.6.1" },
|
||||
{ name = "pypdf" },
|
||||
|
@ -1984,7 +1981,6 @@ unit = [
|
|||
{ name = "milvus-lite", specifier = ">=2.5.0" },
|
||||
{ name = "moto", extras = ["s3"], specifier = ">=5.1.10" },
|
||||
{ name = "ollama" },
|
||||
{ name = "openai" },
|
||||
{ name = "psycopg2-binary", specifier = ">=2.9.0" },
|
||||
{ name = "pymilvus", specifier = ">=2.6.1" },
|
||||
{ name = "pypdf" },
|
||||
|
@ -2023,7 +2019,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "locust"
|
||||
version = "2.39.1"
|
||||
version = "2.40.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "configargparse" },
|
||||
|
@ -2035,6 +2031,7 @@ dependencies = [
|
|||
{ name = "locust-cloud" },
|
||||
{ name = "msgpack" },
|
||||
{ name = "psutil" },
|
||||
{ name = "pytest" },
|
||||
{ name = "python-engineio" },
|
||||
{ name = "python-socketio", extra = ["client"] },
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
|
@ -2043,9 +2040,9 @@ dependencies = [
|
|||
{ name = "setuptools" },
|
||||
{ name = "werkzeug" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/95/c8/10aa5445c404eed389b56877e6714c1787190cc09dd70059ce3765979ec5/locust-2.39.1.tar.gz", hash = "sha256:6bdd19e27edf9a1c84391d6cf6e9a737dfb832be7dfbf39053191ae31b9cc498", size = 1409902, upload-time = "2025-08-29T17:41:01.544Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/01/22/82f40176473a98c9479bed667d3ad21bb859d2cb67f6880a6b0b6a725e45/locust-2.40.1.tar.gz", hash = "sha256:5bde76c1cf7e412071670f926f34844e119210c93f07a4cf9fc4cb93c60a578a", size = 1411606, upload-time = "2025-09-05T15:57:35.76Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ec/b3/b2f4b2ca88b1e72eba7be2b2982533b887f8b709d222db78eb9602aa5121/locust-2.39.1-py3-none-any.whl", hash = "sha256:fd5148f2f1a4ed34aee968abc4393674e69d1b5e1b54db50a397f6eb09ce0b04", size = 1428155, upload-time = "2025-08-29T17:41:00.245Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/3b/e6/9c6335ab16becf4f8ad3da6083ab78793c56ec1ca496d6f7c74660c21c3f/locust-2.40.1-py3-none-any.whl", hash = "sha256:ef0517f9bb5ed0afa7035014faaf944802917e07da8649461aaaf5e5f3ba8a65", size = 1430154, upload-time = "2025-09-05T15:57:33.233Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -2619,7 +2616,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "openai"
|
||||
version = "1.102.0"
|
||||
version = "1.107.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "anyio" },
|
||||
|
@ -2631,9 +2628,9 @@ dependencies = [
|
|||
{ name = "tqdm" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/88/67/d6498de300f83ff57a79cb7aa96ef3bef8d6f070c3ded0f1b5b45442a6bc/openai-1.107.0.tar.gz", hash = "sha256:43e04927584e57d0e9e640ee0077c78baf8150098be96ebd5c512539b6c4e9a4", size = 566056, upload-time = "2025-09-08T19:25:47.604Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/91/ed/e8a4fd20390f2858b95227c288df8fe0c835f7c77625f7583609161684ba/openai-1.107.0-py3-none-any.whl", hash = "sha256:3dcfa3cbb116bd6924b27913b8da28c4a787379ff60049588547a1013e6d6438", size = 950968, upload-time = "2025-09-08T19:25:45.552Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -3540,7 +3537,7 @@ wheels = [
|
|||
|
||||
[[package]]
|
||||
name = "pytest"
|
||||
version = "8.4.1"
|
||||
version = "8.4.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "colorama", marker = "sys_platform == 'win32'" },
|
||||
|
@ -3549,9 +3546,9 @@ dependencies = [
|
|||
{ name = "pluggy" },
|
||||
{ name = "pygments" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue