feat(cherry-pick): fixes for 0.3.1 release (#3998)

## Summary Cherry-picks 5 critical fixes from main to the release-0.3.x branch for the v0.3.1 release, plus CI workflow updates. **Note**: This recreates the cherry-picks from the closed PR #3991, now targeting the renamed `release-0.3.x` branch (previously `release-0.3.x-maint`). ## Commits 1. **2c56a8560** - fix(context): prevent provider data leak between streaming requests (#3924) - **CRITICAL SECURITY FIX**: Prevents provider credentials from leaking between requests - Fixed import path for 0.3.0 compatibility 2. **ddd32b187** - fix(inference): enable routing of models with provider_data alone (#3928) - Enables routing for fully qualified model IDs with provider_data - Resolved merge conflicts, adapted for 0.3.0 structure 3. **f7c2973aa** - fix: Avoid BadRequestError due to invalid max_tokens (#3667) - Fixes failures with Gemini and other providers that reject max_tokens=0 - Non-breaking API change 4. **d7f9da616** - fix(responses): sync conversation before yielding terminal events in streaming (#3888) - Ensures conversation sync executes even when streaming consumers break early 5. **0ffa8658b** - fix(logging): ensure logs go to stderr, loggers obey levels (#3885) - Fixes logging infrastructure 6. **75b49cb3c** - ci: support release branches and match client branch (#3990) - Updates CI workflows to support release-X.Y.x branches - Matches client branch from llama-stack-client-python for release testing - Fixes artifact name collisions ## Adaptations for 0.3.0 - Fixed import paths: `llama_stack.core.telemetry.tracing` → `llama_stack.providers.utils.telemetry.tracing` - Fixed import paths: `llama_stack.core.telemetry.telemetry` → `llama_stack.apis.telemetry` - Changed `self.telemetry_enabled` → `self.telemetry` (0.3.0 attribute name) - Removed `rerank()` method that doesn't exist in 0.3.0 ## Testing All imports verified and tests should pass once CI is set up.
2025-12-03 09:53:45 +00:00 · 2025-10-30 21:51:42 -07:00 · 2025-10-30 21:51:42 -07:00 · 39f33f7f12
commit 39f33f7f12
parent bf091306fe
11 changed files with 182 additions and 65 deletions
--- a/llama_stack/core/utils/context.py
+++ b/llama_stack/core/utils/context.py
@ -7,6 +7,10 @@
 from collections.abc import AsyncGenerator
 from contextvars import ContextVar

+from llama_stack.providers.utils.telemetry.tracing import CURRENT_TRACE_CONTEXT
+
+_MISSING = object()
+

 def preserve_contexts_async_generator[T](
    gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
@ -21,20 +25,60 @@ def preserve_contexts_async_generator[T](

    async def wrapper() -> AsyncGenerator[T, None]:
        while True:
+            previous_values: dict[ContextVar, object] = {}
+            tokens: dict[ContextVar, object] = {}
+
+            # Restore ALL context values before any await and capture previous state
+            # This is needed to propagate context across async generator boundaries
+            for context_var in context_vars:
+                try:
+                    previous_values[context_var] = context_var.get()
+                except LookupError:
+                    previous_values[context_var] = _MISSING
+                tokens[context_var] = context_var.set(initial_context_values[context_var.name])
+
+            def _restore_context_var(context_var: ContextVar, *, _tokens=tokens, _prev=previous_values) -> None:
+                token = _tokens.get(context_var)
+                previous_value = _prev.get(context_var, _MISSING)
+                if token is not None:
+                    try:
+                        context_var.reset(token)
+                        return
+                    except (RuntimeError, ValueError):
+                        pass
+
+                if previous_value is _MISSING:
+                    context_var.set(None)
+                else:
+                    context_var.set(previous_value)
+
            try:
-                # Restore context values before any await
-                for context_var in context_vars:
-                    context_var.set(initial_context_values[context_var.name])
-
                item = await gen.__anext__()
-
-                # Update our tracked values with any changes made during this iteration
-                for context_var in context_vars:
-                    initial_context_values[context_var.name] = context_var.get()
-
-                yield item
-
            except StopAsyncIteration:
+                # Restore all context vars before exiting to prevent leaks
+                # Use _restore_context_var for all vars to properly restore to previous values
+                for context_var in context_vars:
+                    _restore_context_var(context_var)
                break
+            except Exception:
+                # Restore all context vars on exception
+                for context_var in context_vars:
+                    _restore_context_var(context_var)
+                raise
+
+            try:
+                yield item
+                # Update our tracked values with any changes made during this iteration
+                # Only for non-trace context vars - trace context must persist across yields
+                # to allow nested span tracking for telemetry
+                for context_var in context_vars:
+                    if context_var is not CURRENT_TRACE_CONTEXT:
+                        initial_context_values[context_var.name] = context_var.get()
+            finally:
+                # Restore non-trace context vars after each yield to prevent leaks between requests
+                # CURRENT_TRACE_CONTEXT is NOT restored here to preserve telemetry span stack
+                for context_var in context_vars:
+                    if context_var is not CURRENT_TRACE_CONTEXT:
+                        _restore_context_var(context_var)

    return wrapper()
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@ -18,7 +18,7 @@
        "class-variance-authority": "^0.7.1",
        "clsx": "^2.1.1",
        "framer-motion": "^12.23.24",
-        "llama-stack-client": "^0.2.23",
+        "llama-stack-client": "^0.3.0",
        "lucide-react": "^0.545.0",
        "next": "15.5.4",
        "next-auth": "^4.24.11",
@ -75,20 +75,6 @@
        "url": "https://github.com/sponsors/sindresorhus"
      }
    },
-    "node_modules/@ampproject/remapping": {
-      "version": "2.3.0",
-      "resolved": "https://registry.npmjs.org/@ampproject/remapping/-/remapping-2.3.0.tgz",
-      "integrity": "sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==",
-      "dev": true,
-      "license": "Apache-2.0",
-      "dependencies": {
-        "@jridgewell/gen-mapping": "^0.3.5",
-        "@jridgewell/trace-mapping": "^0.3.24"
-      },
-      "engines": {
-        "node": ">=6.0.0"
-      }
-    },
    "node_modules/@asamuzakjp/css-color": {
      "version": "3.2.0",
      "resolved": "https://registry.npmjs.org/@asamuzakjp/css-color/-/css-color-3.2.0.tgz",
@ -3647,17 +3633,6 @@
      "dev": true,
      "license": "MIT"
    },
-    "node_modules/@tybys/wasm-util": {
-      "version": "0.9.0",
-      "resolved": "https://registry.npmjs.org/@tybys/wasm-util/-/wasm-util-0.9.0.tgz",
-      "integrity": "sha512-6+7nlbMVX/PVDCwaIQ8nTOPveOcFLSt8GcXdx8hD0bt39uWxYT88uXzqTd4fTvqta7oeUJqudepapKNt2DYJFw==",
-      "dev": true,
-      "license": "MIT",
-      "optional": true,
-      "dependencies": {
-        "tslib": "^2.4.0"
-      }
-    },
    "node_modules/@types/aria-query": {
      "version": "5.0.4",
      "resolved": "https://registry.npmjs.org/@types/aria-query/-/aria-query-5.0.4.tgz",
@ -9660,9 +9635,9 @@
      "license": "MIT"
    },
    "node_modules/llama-stack-client": {
-      "version": "0.2.23",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.23.tgz",
-      "integrity": "sha512-J3YFH1HW2K70capejQxGlCyTgKdfx+sQf8Ab+HFi1j2Q00KtpHXB79RxejvBxjWC3X2E++P9iU57KdU2Tp/rIQ==",
+      "version": "0.3.0",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.0.tgz",
+      "integrity": "sha512-76K/t1doaGmlBbDxCADaral9Vccvys9P8pqAMIhwBhMAqWudCEORrMMhUSg+pjhamWmEKj3wa++d4zeOGbfN/w==",
      "license": "MIT",
      "dependencies": {
        "@types/node": "^18.11.18",