diff --git a/docs/source/distributions/k8s-benchmark/README.md b/benchmarking/k8s-benchmark/README.md similarity index 98% rename from docs/source/distributions/k8s-benchmark/README.md rename to benchmarking/k8s-benchmark/README.md index 42da4d466..3b0d0c4db 100644 --- a/docs/source/distributions/k8s-benchmark/README.md +++ b/benchmarking/k8s-benchmark/README.md @@ -34,13 +34,12 @@ This data enables data-driven architectural decisions and performance optimizati **1. Deploy base k8s infrastructure:** ```bash -cd ../k8s +cd ../../docs/source/distributions/k8s ./apply.sh ``` **2. Deploy benchmark components:** ```bash -cd ../k8s-benchmark ./apply.sh ``` @@ -56,7 +55,6 @@ kubectl get pods **Benchmark Llama Stack (default):** ```bash -cd docs/source/distributions/k8s-benchmark/ ./run-benchmark.sh ``` diff --git a/docs/source/distributions/k8s-benchmark/apply.sh b/benchmarking/k8s-benchmark/apply.sh similarity index 100% rename from docs/source/distributions/k8s-benchmark/apply.sh rename to benchmarking/k8s-benchmark/apply.sh diff --git a/docs/source/distributions/k8s-benchmark/benchmark.py b/benchmarking/k8s-benchmark/benchmark.py similarity index 80% rename from docs/source/distributions/k8s-benchmark/benchmark.py rename to benchmarking/k8s-benchmark/benchmark.py index 83ba9602a..d5e34aa23 100644 --- a/docs/source/distributions/k8s-benchmark/benchmark.py +++ b/benchmarking/k8s-benchmark/benchmark.py @@ -14,7 +14,7 @@ import os import random import statistics import time -from typing import Tuple + import aiohttp @@ -55,50 +55,50 @@ class BenchmarkStats: total_time = self.end_time - self.start_time success_rate = (self.success_count / self.total_requests) * 100 - - print(f"\n{'='*60}") - print(f"BENCHMARK RESULTS") - - print(f"\nResponse Time Statistics:") + + print(f"\n{'=' * 60}") + print("BENCHMARK RESULTS") + + print("\nResponse Time Statistics:") print(f" Mean: {statistics.mean(self.response_times):.3f}s") print(f" Median: {statistics.median(self.response_times):.3f}s") print(f" Min: {min(self.response_times):.3f}s") print(f" Max: {max(self.response_times):.3f}s") - + if len(self.response_times) > 1: print(f" Std Dev: {statistics.stdev(self.response_times):.3f}s") - + percentiles = [50, 90, 95, 99] sorted_times = sorted(self.response_times) - print(f"\nPercentiles:") + print("\nPercentiles:") for p in percentiles: idx = int(len(sorted_times) * p / 100) - 1 idx = max(0, min(idx, len(sorted_times) - 1)) print(f" P{p}: {sorted_times[idx]:.3f}s") - + if self.ttft_times: - print(f"\nTime to First Token (TTFT) Statistics:") + print("\nTime to First Token (TTFT) Statistics:") print(f" Mean: {statistics.mean(self.ttft_times):.3f}s") print(f" Median: {statistics.median(self.ttft_times):.3f}s") print(f" Min: {min(self.ttft_times):.3f}s") print(f" Max: {max(self.ttft_times):.3f}s") - + if len(self.ttft_times) > 1: print(f" Std Dev: {statistics.stdev(self.ttft_times):.3f}s") - + sorted_ttft = sorted(self.ttft_times) - print(f"\nTTFT Percentiles:") + print("\nTTFT Percentiles:") for p in percentiles: idx = int(len(sorted_ttft) * p / 100) - 1 idx = max(0, min(idx, len(sorted_ttft) - 1)) print(f" P{p}: {sorted_ttft[idx]:.3f}s") - + if self.chunks_received: - print(f"\nStreaming Statistics:") + print("\nStreaming Statistics:") print(f" Mean chunks per response: {statistics.mean(self.chunks_received):.1f}") print(f" Total chunks received: {sum(self.chunks_received)}") - - print(f"{'='*60}") + + print(f"{'=' * 60}") print(f"Total time: {total_time:.2f}s") print(f"Concurrent users: {self.concurrent_users}") print(f"Total requests: {self.total_requests}") @@ -106,16 +106,16 @@ class BenchmarkStats: print(f"Failed requests: {len(self.errors)}") print(f"Success rate: {success_rate:.1f}%") print(f"Requests per second: {self.success_count / total_time:.2f}") - + if self.errors: - print(f"\nErrors (showing first 5):") + print("\nErrors (showing first 5):") for error in self.errors[:5]: print(f" {error}") class LlamaStackBenchmark: def __init__(self, base_url: str, model_id: str): - self.base_url = base_url.rstrip('/') + self.base_url = base_url.rstrip("/") self.model_id = model_id self.headers = {"Content-Type": "application/json"} self.test_messages = [ @@ -126,74 +126,67 @@ class LlamaStackBenchmark: [ {"role": "user", "content": "What is machine learning?"}, {"role": "assistant", "content": "Machine learning is a subset of AI..."}, - {"role": "user", "content": "Can you give me a practical example?"} - ] + {"role": "user", "content": "Can you give me a practical example?"}, + ], ] - - async def make_async_streaming_request(self) -> Tuple[float, int, float | None, str | None]: + async def make_async_streaming_request(self) -> tuple[float, int, float | None, str | None]: """Make a single async streaming chat completion request.""" messages = random.choice(self.test_messages) - payload = { - "model": self.model_id, - "messages": messages, - "stream": True, - "max_tokens": 100 - } - + payload = {"model": self.model_id, "messages": messages, "stream": True, "max_tokens": 100} + start_time = time.time() chunks_received = 0 ttft = None error = None - + session = aiohttp.ClientSession() - + try: async with session.post( f"{self.base_url}/chat/completions", headers=self.headers, json=payload, - timeout=aiohttp.ClientTimeout(total=30) + timeout=aiohttp.ClientTimeout(total=30), ) as response: if response.status == 200: async for line in response.content: if line: - line_str = line.decode('utf-8').strip() - if line_str.startswith('data: '): + line_str = line.decode("utf-8").strip() + if line_str.startswith("data: "): chunks_received += 1 if ttft is None: ttft = time.time() - start_time - if line_str == 'data: [DONE]': + if line_str == "data: [DONE]": break - + if chunks_received == 0: error = "No streaming chunks received" else: text = await response.text() error = f"HTTP {response.status}: {text[:100]}" - + except Exception as e: error = f"Request error: {str(e)}" finally: await session.close() - + response_time = time.time() - start_time return response_time, chunks_received, ttft, error - async def run_benchmark(self, duration: int, concurrent_users: int) -> BenchmarkStats: """Run benchmark using async requests for specified duration.""" stats = BenchmarkStats() stats.concurrent_users = concurrent_users stats.start_time = time.time() - + print(f"Starting benchmark: {duration}s duration, {concurrent_users} concurrent users") print(f"Target URL: {self.base_url}/chat/completions") print(f"Model: {self.model_id}") - + connector = aiohttp.TCPConnector(limit=concurrent_users) - async with aiohttp.ClientSession(connector=connector) as session: - + async with aiohttp.ClientSession(connector=connector): + async def worker(worker_id: int): """Worker that sends requests sequentially until canceled.""" request_count = 0 @@ -202,12 +195,12 @@ class LlamaStackBenchmark: response_time, chunks, ttft, error = await self.make_async_streaming_request() await stats.add_result(response_time, chunks, ttft, error) request_count += 1 - + except asyncio.CancelledError: break except Exception as e: await stats.add_result(0, 0, None, f"Worker {worker_id} error: {str(e)}") - + # Progress reporting task async def progress_reporter(): last_report_time = time.time() @@ -216,48 +209,52 @@ class LlamaStackBenchmark: await asyncio.sleep(1) # Report every second if time.time() >= last_report_time + 10: # Report every 10 seconds elapsed = time.time() - stats.start_time - print(f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}") + print( + f"Completed: {stats.total_requests} requests in {elapsed:.1f}s, RPS: {stats.total_requests / elapsed:.1f}" + ) last_report_time = time.time() except asyncio.CancelledError: break - + # Spawn concurrent workers tasks = [asyncio.create_task(worker(i)) for i in range(concurrent_users)] progress_task = asyncio.create_task(progress_reporter()) tasks.append(progress_task) - + # Wait for duration then cancel all tasks await asyncio.sleep(duration) - + for task in tasks: task.cancel() - + # Wait for all tasks to complete await asyncio.gather(*tasks, return_exceptions=True) - + stats.end_time = time.time() return stats def main(): parser = argparse.ArgumentParser(description="Llama Stack Benchmark Tool") - parser.add_argument("--base-url", default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"), - help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)") - parser.add_argument("--model", default=os.getenv("INFERENCE_MODEL", "test-model"), - help="Model ID to use for requests") - parser.add_argument("--duration", type=int, default=60, - help="Duration in seconds to run benchmark (default: 60)") - parser.add_argument("--concurrent", type=int, default=10, - help="Number of concurrent users (default: 10)") - + parser.add_argument( + "--base-url", + default=os.getenv("BENCHMARK_BASE_URL", "http://localhost:8000/v1/openai/v1"), + help="Base URL for the API (default: http://localhost:8000/v1/openai/v1)", + ) + parser.add_argument( + "--model", default=os.getenv("INFERENCE_MODEL", "test-model"), help="Model ID to use for requests" + ) + parser.add_argument("--duration", type=int, default=60, help="Duration in seconds to run benchmark (default: 60)") + parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent users (default: 10)") + args = parser.parse_args() - + benchmark = LlamaStackBenchmark(args.base_url, args.model) - + try: stats = asyncio.run(benchmark.run_benchmark(args.duration, args.concurrent)) stats.print_summary() - + except KeyboardInterrupt: print("\nBenchmark interrupted by user") except Exception as e: diff --git a/docs/source/distributions/k8s-benchmark/openai-mock-server.py b/benchmarking/k8s-benchmark/openai-mock-server.py similarity index 60% rename from docs/source/distributions/k8s-benchmark/openai-mock-server.py rename to benchmarking/k8s-benchmark/openai-mock-server.py index de0680842..9e898af8e 100755 --- a/docs/source/distributions/k8s-benchmark/openai-mock-server.py +++ b/benchmarking/k8s-benchmark/openai-mock-server.py @@ -11,180 +11,192 @@ OpenAI-compatible mock server that returns: - Valid OpenAI-formatted chat completion responses with dynamic content """ -from flask import Flask, request, jsonify, Response -import time -import random -import uuid -import json import argparse +import json import os +import random +import time +import uuid + +from flask import Flask, Response, jsonify, request app = Flask(__name__) + # Models from environment variables def get_models(): models_str = os.getenv("MOCK_MODELS", "meta-llama/Llama-3.2-3B-Instruct") model_ids = [m.strip() for m in models_str.split(",") if m.strip()] - + return { "object": "list", "data": [ - { - "id": model_id, - "object": "model", - "created": 1234567890, - "owned_by": "vllm" - } - for model_id in model_ids - ] + {"id": model_id, "object": "model", "created": 1234567890, "owned_by": "vllm"} for model_id in model_ids + ], } + def generate_random_text(length=50): """Generate random but coherent text for responses.""" words = [ - "Hello", "there", "I'm", "an", "AI", "assistant", "ready", "to", "help", "you", - "with", "your", "questions", "and", "tasks", "today", "Let", "me","know", "what", - "you'd", "like", "to", "discuss", "or", "explore", "together", "I", "can", "assist", - "with", "various", "topics", "including", "coding", "writing", "analysis", "and", "more" + "Hello", + "there", + "I'm", + "an", + "AI", + "assistant", + "ready", + "to", + "help", + "you", + "with", + "your", + "questions", + "and", + "tasks", + "today", + "Let", + "me", + "know", + "what", + "you'd", + "like", + "to", + "discuss", + "or", + "explore", + "together", + "I", + "can", + "assist", + "with", + "various", + "topics", + "including", + "coding", + "writing", + "analysis", + "and", + "more", ] return " ".join(random.choices(words, k=length)) -@app.route('/v1/models', methods=['GET']) + +@app.route("/v1/models", methods=["GET"]) def list_models(): models = get_models() print(f"[MOCK] Returning models: {[m['id'] for m in models['data']]}") return jsonify(models) -@app.route('/v1/chat/completions', methods=['POST']) + +@app.route("/v1/chat/completions", methods=["POST"]) def chat_completions(): """Return OpenAI-formatted chat completion responses.""" data = request.get_json() - default_model = get_models()['data'][0]['id'] - model = data.get('model', default_model) - messages = data.get('messages', []) - stream = data.get('stream', False) - + default_model = get_models()["data"][0]["id"] + model = data.get("model", default_model) + messages = data.get("messages", []) + stream = data.get("stream", False) + print(f"[MOCK] Chat completion request - model: {model}, stream: {stream}") - + if stream: return handle_streaming_completion(model, messages) else: return handle_non_streaming_completion(model, messages) + def handle_non_streaming_completion(model, messages): response_text = generate_random_text(random.randint(20, 80)) - + # Calculate realistic token counts - prompt_tokens = sum(len(str(msg.get('content', '')).split()) for msg in messages) + prompt_tokens = sum(len(str(msg.get("content", "")).split()) for msg in messages) completion_tokens = len(response_text.split()) - + response = { "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion", "created": int(time.time()), "model": model, - "choices": [ - { - "index": 0, - "message": { - "role": "assistant", - "content": response_text - }, - "finish_reason": "stop" - } - ], + "choices": [{"index": 0, "message": {"role": "assistant", "content": response_text}, "finish_reason": "stop"}], "usage": { "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, - "total_tokens": prompt_tokens + completion_tokens - } + "total_tokens": prompt_tokens + completion_tokens, + }, } - + return jsonify(response) + def handle_streaming_completion(model, messages): def generate_stream(): # Generate response text full_response = generate_random_text(random.randint(30, 100)) words = full_response.split() - + # Send initial chunk initial_chunk = { "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion.chunk", "created": int(time.time()), "model": model, - "choices": [ - { - "index": 0, - "delta": {"role": "assistant", "content": ""} - } - ] + "choices": [{"index": 0, "delta": {"role": "assistant", "content": ""}}], } yield f"data: {json.dumps(initial_chunk)}\n\n" - + # Send word by word for i, word in enumerate(words): chunk = { "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", - "object": "chat.completion.chunk", + "object": "chat.completion.chunk", "created": int(time.time()), "model": model, - "choices": [ - { - "index": 0, - "delta": {"content": f"{word} " if i < len(words) - 1 else word} - } - ] + "choices": [{"index": 0, "delta": {"content": f"{word} " if i < len(words) - 1 else word}}], } yield f"data: {json.dumps(chunk)}\n\n" # Configurable delay to simulate realistic streaming stream_delay = float(os.getenv("STREAM_DELAY_SECONDS", "0.005")) time.sleep(stream_delay) - + # Send final chunk final_chunk = { "id": f"chatcmpl-{uuid.uuid4().hex[:8]}", "object": "chat.completion.chunk", "created": int(time.time()), "model": model, - "choices": [ - { - "index": 0, - "delta": {"content": ""}, - "finish_reason": "stop" - } - ] + "choices": [{"index": 0, "delta": {"content": ""}, "finish_reason": "stop"}], } yield f"data: {json.dumps(final_chunk)}\n\n" yield "data: [DONE]\n\n" - + return Response( generate_stream(), - mimetype='text/event-stream', + mimetype="text/event-stream", headers={ - 'Cache-Control': 'no-cache', - 'Connection': 'keep-alive', - 'Access-Control-Allow-Origin': '*', - } + "Cache-Control": "no-cache", + "Connection": "keep-alive", + "Access-Control-Allow-Origin": "*", + }, ) -@app.route('/health', methods=['GET']) + +@app.route("/health", methods=["GET"]) def health(): return jsonify({"status": "healthy", "type": "openai-mock"}) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='OpenAI-compatible mock server') - parser.add_argument('--port', type=int, default=8081, - help='Port to run the server on (default: 8081)') + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="OpenAI-compatible mock server") + parser.add_argument("--port", type=int, default=8081, help="Port to run the server on (default: 8081)") args = parser.parse_args() - + port = args.port - + models = get_models() print("Starting OpenAI-compatible mock server...") print(f"- /models endpoint with: {[m['id'] for m in models['data']]}") print("- OpenAI-formatted chat/completion responses with dynamic content") print("- Streaming support with valid SSE format") print(f"- Listening on: http://0.0.0.0:{port}") - app.run(host='0.0.0.0', port=port, debug=False) + app.run(host="0.0.0.0", port=port, debug=False) diff --git a/docs/source/distributions/k8s-benchmark/profile_running_server.sh b/benchmarking/k8s-benchmark/profile_running_server.sh similarity index 100% rename from docs/source/distributions/k8s-benchmark/profile_running_server.sh rename to benchmarking/k8s-benchmark/profile_running_server.sh diff --git a/docs/source/distributions/k8s-benchmark/run-benchmark.sh b/benchmarking/k8s-benchmark/run-benchmark.sh similarity index 100% rename from docs/source/distributions/k8s-benchmark/run-benchmark.sh rename to benchmarking/k8s-benchmark/run-benchmark.sh diff --git a/docs/source/distributions/k8s-benchmark/stack-configmap.yaml b/benchmarking/k8s-benchmark/stack-configmap.yaml similarity index 100% rename from docs/source/distributions/k8s-benchmark/stack-configmap.yaml rename to benchmarking/k8s-benchmark/stack-configmap.yaml diff --git a/docs/source/distributions/k8s-benchmark/stack-k8s.yaml.template b/benchmarking/k8s-benchmark/stack-k8s.yaml.template similarity index 100% rename from docs/source/distributions/k8s-benchmark/stack-k8s.yaml.template rename to benchmarking/k8s-benchmark/stack-k8s.yaml.template diff --git a/docs/source/distributions/k8s-benchmark/stack_run_config.yaml b/benchmarking/k8s-benchmark/stack_run_config.yaml similarity index 100% rename from docs/source/distributions/k8s-benchmark/stack_run_config.yaml rename to benchmarking/k8s-benchmark/stack_run_config.yaml diff --git a/docs/source/contributing/index.md b/docs/source/contributing/index.md index 1846f4d97..71c3bd5a6 100644 --- a/docs/source/contributing/index.md +++ b/docs/source/contributing/index.md @@ -35,5 +35,5 @@ testing/record-replay ### Benchmarking -```{include} ../../../docs/source/distributions/k8s-benchmark/README.md +```{include} ../../../benchmarking/k8s-benchmark/README.md ``` diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index 1db1c61cd..e2c0815fd 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -3578,6 +3578,13 @@ "tailwindcss": "4.1.6" } }, + "node_modules/@tailwindcss/node/node_modules/tailwindcss": { + "version": "4.1.6", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", + "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", + "dev": true, + "license": "MIT" + }, "node_modules/@tailwindcss/oxide": { "version": "4.1.6", "resolved": "https://registry.npmjs.org/@tailwindcss/oxide/-/oxide-4.1.6.tgz", @@ -3838,6 +3845,13 @@ "tailwindcss": "4.1.6" } }, + "node_modules/@tailwindcss/postcss/node_modules/tailwindcss": { + "version": "4.1.6", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", + "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", + "dev": true, + "license": "MIT" + }, "node_modules/@testing-library/dom": { "version": "10.4.1", "resolved": "https://registry.npmjs.org/@testing-library/dom/-/dom-10.4.1.tgz", @@ -13843,9 +13857,9 @@ } }, "node_modules/tailwindcss": { - "version": "4.1.6", - "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.6.tgz", - "integrity": "sha512-j0cGLTreM6u4OWzBeLBpycK0WIh8w7kSwcUsQZoGLHZ7xDTdM69lN64AgoIEEwFi0tnhs4wSykUa5YWxAzgFYg==", + "version": "4.1.13", + "resolved": "https://registry.npmjs.org/tailwindcss/-/tailwindcss-4.1.13.tgz", + "integrity": "sha512-i+zidfmTqtwquj4hMEwdjshYYgMbOrPzb9a0M3ZgNa0JMoZeFC6bxZvO8yr8ozS6ix2SDz0+mvryPeBs2TFE+w==", "dev": true, "license": "MIT" }, diff --git a/tests/integration/telemetry/test_openai_telemetry.py b/tests/integration/telemetry/test_openai_telemetry.py index cdd9b6702..b3ffb6b09 100644 --- a/tests/integration/telemetry/test_openai_telemetry.py +++ b/tests/integration/telemetry/test_openai_telemetry.py @@ -49,16 +49,13 @@ def setup_openai_telemetry_data(llama_stack_client, text_model_id): traces = llama_stack_client.telemetry.query_traces(limit=10) if len(traces) >= 5: # 5 OpenAI completion traces break - time.sleep(1) + time.sleep(0.1) if len(traces) < 5: pytest.fail( f"Failed to create sufficient OpenAI completion telemetry data after 30s. Got {len(traces)} traces." ) - # Wait for 5 seconds to ensure traces has completed logging - time.sleep(5) - yield @@ -185,11 +182,13 @@ def test_openai_completion_creates_telemetry(llama_stack_client, text_model_id): assert len(response.choices) > 0, "Response should have at least one choice" # Wait for telemetry to be recorded - time.sleep(3) - - # Check that we have more traces now - final_traces = llama_stack_client.telemetry.query_traces(limit=20) - final_count = len(final_traces) + start_time = time.time() + while time.time() - start_time < 30: + final_traces = llama_stack_client.telemetry.query_traces(limit=20) + final_count = len(final_traces) + if final_count > initial_count: + break + time.sleep(0.1) # Should have at least as many traces as before (might have more due to other activity) assert final_count >= initial_count, "Should have at least as many traces after OpenAI call" diff --git a/tests/integration/telemetry/test_telemetry.py b/tests/integration/telemetry/test_telemetry.py index d363edbc0..e86da954e 100644 --- a/tests/integration/telemetry/test_telemetry.py +++ b/tests/integration/telemetry/test_telemetry.py @@ -42,14 +42,11 @@ def setup_telemetry_data(llama_stack_client, text_model_id): traces = llama_stack_client.telemetry.query_traces(limit=10) if len(traces) >= 4: break - time.sleep(1) + time.sleep(0.1) if len(traces) < 4: pytest.fail(f"Failed to create sufficient telemetry data after 30s. Got {len(traces)} traces.") - # Wait for 5 seconds to ensure traces has completed logging - time.sleep(5) - yield diff --git a/tests/integration/telemetry/test_telemetry_metrics.py b/tests/integration/telemetry/test_telemetry_metrics.py index 4ba2bd2d9..1d8312ae2 100644 --- a/tests/integration/telemetry/test_telemetry_metrics.py +++ b/tests/integration/telemetry/test_telemetry_metrics.py @@ -46,10 +46,7 @@ def setup_telemetry_metrics_data(openai_client, client_with_models, text_model_i break except Exception: pass - time.sleep(1) - - # Wait additional time to ensure all metrics are processed - time.sleep(5) + time.sleep(0.1) # Return the token lists for use in tests return {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": total_tokens} diff --git a/uv.lock b/uv.lock index 2788c6fef..3d7713f54 100644 --- a/uv.lock +++ b/uv.lock @@ -2023,7 +2023,7 @@ wheels = [ [[package]] name = "locust" -version = "2.39.1" +version = "2.40.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "configargparse" }, @@ -2035,6 +2035,7 @@ dependencies = [ { name = "locust-cloud" }, { name = "msgpack" }, { name = "psutil" }, + { name = "pytest" }, { name = "python-engineio" }, { name = "python-socketio", extra = ["client"] }, { name = "pywin32", marker = "sys_platform == 'win32'" }, @@ -2043,9 +2044,9 @@ dependencies = [ { name = "setuptools" }, { name = "werkzeug" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/95/c8/10aa5445c404eed389b56877e6714c1787190cc09dd70059ce3765979ec5/locust-2.39.1.tar.gz", hash = "sha256:6bdd19e27edf9a1c84391d6cf6e9a737dfb832be7dfbf39053191ae31b9cc498", size = 1409902, upload-time = "2025-08-29T17:41:01.544Z" } +sdist = { url = "https://files.pythonhosted.org/packages/01/22/82f40176473a98c9479bed667d3ad21bb859d2cb67f6880a6b0b6a725e45/locust-2.40.1.tar.gz", hash = "sha256:5bde76c1cf7e412071670f926f34844e119210c93f07a4cf9fc4cb93c60a578a", size = 1411606, upload-time = "2025-09-05T15:57:35.76Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/ec/b3/b2f4b2ca88b1e72eba7be2b2982533b887f8b709d222db78eb9602aa5121/locust-2.39.1-py3-none-any.whl", hash = "sha256:fd5148f2f1a4ed34aee968abc4393674e69d1b5e1b54db50a397f6eb09ce0b04", size = 1428155, upload-time = "2025-08-29T17:41:00.245Z" }, + { url = "https://files.pythonhosted.org/packages/3b/e6/9c6335ab16becf4f8ad3da6083ab78793c56ec1ca496d6f7c74660c21c3f/locust-2.40.1-py3-none-any.whl", hash = "sha256:ef0517f9bb5ed0afa7035014faaf944802917e07da8649461aaaf5e5f3ba8a65", size = 1430154, upload-time = "2025-09-05T15:57:33.233Z" }, ] [[package]] @@ -2619,7 +2620,7 @@ wheels = [ [[package]] name = "openai" -version = "1.102.0" +version = "1.107.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "anyio" }, @@ -2631,9 +2632,9 @@ dependencies = [ { name = "tqdm" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/07/55/da5598ed5c6bdd9939633854049cddc5cbac0da938dfcfcb3c6b119c16c0/openai-1.102.0.tar.gz", hash = "sha256:2e0153bcd64a6523071e90211cbfca1f2bbc5ceedd0993ba932a5869f93b7fc9", size = 519027, upload-time = "2025-08-26T20:50:29.397Z" } +sdist = { url = "https://files.pythonhosted.org/packages/88/67/d6498de300f83ff57a79cb7aa96ef3bef8d6f070c3ded0f1b5b45442a6bc/openai-1.107.0.tar.gz", hash = "sha256:43e04927584e57d0e9e640ee0077c78baf8150098be96ebd5c512539b6c4e9a4", size = 566056, upload-time = "2025-09-08T19:25:47.604Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/0d/c9e7016d82c53c5b5e23e2bad36daebb8921ed44f69c0a985c6529a35106/openai-1.102.0-py3-none-any.whl", hash = "sha256:d751a7e95e222b5325306362ad02a7aa96e1fab3ed05b5888ce1c7ca63451345", size = 812015, upload-time = "2025-08-26T20:50:27.219Z" }, + { url = "https://files.pythonhosted.org/packages/91/ed/e8a4fd20390f2858b95227c288df8fe0c835f7c77625f7583609161684ba/openai-1.107.0-py3-none-any.whl", hash = "sha256:3dcfa3cbb116bd6924b27913b8da28c4a787379ff60049588547a1013e6d6438", size = 950968, upload-time = "2025-09-08T19:25:45.552Z" }, ] [[package]] @@ -3540,7 +3541,7 @@ wheels = [ [[package]] name = "pytest" -version = "8.4.1" +version = "8.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, @@ -3549,9 +3550,9 @@ dependencies = [ { name = "pluggy" }, { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, + { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" }, ] [[package]]