diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml
index 7822e4216..cf10e005c 100644
--- a/.github/workflows/integration-auth-tests.yml
+++ b/.github/workflows/integration-auth-tests.yml
@@ -35,7 +35,7 @@ jobs:
 
       - name: Install minikube
         if: ${{ matrix.auth-provider == 'kubernetes' }}
-        uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19
+        uses: medyagh/setup-minikube@e3c7f79eb1e997eabccc536a6cf318a2b0fe19d9 # v0.0.20
 
       - name: Start minikube
         if: ${{ matrix.auth-provider == 'oauth2_token' }}
diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh
index d9a918fb5..6e794b36f 100755
--- a/llama_stack/distribution/build_container.sh
+++ b/llama_stack/distribution/build_container.sh
@@ -96,7 +96,7 @@ FROM $container_base
 WORKDIR /app
 
 # We install the Python 3.12 dev headers and build tools so that any
-# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully.
+# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully.
 
 RUN dnf -y update && dnf install -y iputils git net-tools wget \
     vim-minimal python3.12 python3.12-pip python3.12-wheel \
@@ -169,7 +169,7 @@ if [ -n "$run_config" ]; then
     echo "Copying external providers directory: $external_providers_dir"
     cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d"
     add_to_container << EOF
-COPY --chmod=g+w providers.d /.llama/providers.d
+COPY providers.d /.llama/providers.d
 EOF
     fi
 
diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py
index 4b295e788..91c6b6c17 100644
--- a/llama_stack/providers/remote/inference/groq/groq.py
+++ b/llama_stack/providers/remote/inference/groq/groq.py
@@ -38,24 +38,18 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin):
             provider_data_api_key_field="groq_api_key",
         )
         self.config = config
-        self._openai_client = None
 
     async def initialize(self):
         await super().initialize()
 
     async def shutdown(self):
         await super().shutdown()
-        if self._openai_client:
-            await self._openai_client.close()
-            self._openai_client = None
 
     def _get_openai_client(self) -> AsyncOpenAI:
-        if not self._openai_client:
-            self._openai_client = AsyncOpenAI(
-                base_url=f"{self.config.url}/openai/v1",
-                api_key=self.config.api_key,
-            )
-        return self._openai_client
+        return AsyncOpenAI(
+            base_url=f"{self.config.url}/openai/v1",
+            api_key=self.get_api_key(),
+        )
 
     async def openai_chat_completion(
         self,
diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py
index 72428422f..818883919 100644
--- a/llama_stack/providers/remote/inference/openai/openai.py
+++ b/llama_stack/providers/remote/inference/openai/openai.py
@@ -59,9 +59,6 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
         # if we do not set this, users will be exposed to the
         # litellm specific model names, an abstraction leak.
         self.is_openai_compat = True
-        self._openai_client = AsyncOpenAI(
-            api_key=self.config.api_key,
-        )
 
     async def initialize(self) -> None:
         await super().initialize()
@@ -69,6 +66,11 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
     async def shutdown(self) -> None:
         await super().shutdown()
 
+    def _get_openai_client(self) -> AsyncOpenAI:
+        return AsyncOpenAI(
+            api_key=self.get_api_key(),
+        )
+
     async def openai_completion(
         self,
         model: str,
@@ -120,7 +122,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
             user=user,
             suffix=suffix,
         )
-        return await self._openai_client.completions.create(**params)
+        return await self._get_openai_client().completions.create(**params)
 
     async def openai_chat_completion(
         self,
@@ -176,7 +178,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
             top_p=top_p,
             user=user,
         )
-        return await self._openai_client.chat.completions.create(**params)
+        return await self._get_openai_client().chat.completions.create(**params)
 
     async def openai_embeddings(
         self,
@@ -204,7 +206,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin):
             params["user"] = user
 
         # Call OpenAI embeddings API
-        response = await self._openai_client.embeddings.create(**params)
+        response = await self._get_openai_client().embeddings.create(**params)
 
         data = []
         for i, embedding_data in enumerate(response.data):
diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py
index 20f863665..9c2dda889 100644
--- a/llama_stack/providers/remote/inference/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py
@@ -7,6 +7,7 @@
 import json
 from collections.abc import Iterable
 
+import requests
 from openai.types.chat import (
     ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage,
 )
@@ -56,6 +57,7 @@ from llama_stack.apis.inference import (
     ToolResponseMessage,
     UserMessage,
 )
+from llama_stack.apis.models import Model
 from llama_stack.log import get_logger
 from llama_stack.models.llama.datatypes import BuiltinTool
 from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin
@@ -176,10 +178,11 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
 
     def __init__(self, config: SambaNovaImplConfig):
         self.config = config
+        self.environment_available_models = []
         LiteLLMOpenAIMixin.__init__(
             self,
             model_entries=MODEL_ENTRIES,
-            api_key_from_config=self.config.api_key,
+            api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None,
             provider_data_api_key_field="sambanova_api_key",
         )
 
@@ -246,6 +249,22 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin):
             **get_sampling_options(request.sampling_params),
         }
 
+    async def register_model(self, model: Model) -> Model:
+        model_id = self.get_provider_model_id(model.provider_resource_id)
+
+        list_models_url = self.config.url + "/models"
+        if len(self.environment_available_models) == 0:
+            try:
+                response = requests.get(list_models_url)
+                response.raise_for_status()
+            except requests.exceptions.RequestException as e:
+                raise RuntimeError(f"Request to {list_models_url} failed") from e
+            self.environment_available_models = [model.get("id") for model in response.json().get("data", {})]
+
+        if model_id.split("sambanova/")[-1] not in self.environment_available_models:
+            logger.warning(f"Model {model_id} not available in {list_models_url}")
+        return model
+
     async def initialize(self):
         await super().initialize()
 
diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py
index 9e6877b7c..e1eb934c5 100644
--- a/llama_stack/providers/remote/inference/together/together.py
+++ b/llama_stack/providers/remote/inference/together/together.py
@@ -68,19 +68,12 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
     def __init__(self, config: TogetherImplConfig) -> None:
         ModelRegistryHelper.__init__(self, MODEL_ENTRIES)
         self.config = config
-        self._client = None
-        self._openai_client = None
 
     async def initialize(self) -> None:
         pass
 
     async def shutdown(self) -> None:
-        if self._client:
-            # Together client has no close method, so just set to None
-            self._client = None
-        if self._openai_client:
-            await self._openai_client.close()
-            self._openai_client = None
+        pass
 
     async def completion(
         self,
@@ -108,29 +101,25 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi
             return await self._nonstream_completion(request)
 
     def _get_client(self) -> AsyncTogether:
-        if not self._client:
-            together_api_key = None
-            config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
-            if config_api_key:
-                together_api_key = config_api_key
-            else:
-                provider_data = self.get_request_provider_data()
-                if provider_data is None or not provider_data.together_api_key:
-                    raise ValueError(
-                        'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": <your api key>}'
-                    )
-                together_api_key = provider_data.together_api_key
-            self._client = AsyncTogether(api_key=together_api_key)
-        return self._client
+        together_api_key = None
+        config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None
+        if config_api_key:
+            together_api_key = config_api_key
+        else:
+            provider_data = self.get_request_provider_data()
+            if provider_data is None or not provider_data.together_api_key:
+                raise ValueError(
+                    'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": <your api key>}'
+                )
+            together_api_key = provider_data.together_api_key
+        return AsyncTogether(api_key=together_api_key)
 
     def _get_openai_client(self) -> AsyncOpenAI:
-        if not self._openai_client:
-            together_client = self._get_client().client
-            self._openai_client = AsyncOpenAI(
-                base_url=together_client.base_url,
-                api_key=together_client.api_key,
-            )
-        return self._openai_client
+        together_client = self._get_client().client
+        return AsyncOpenAI(
+            base_url=together_client.base_url,
+            api_key=together_client.api_key,
+        )
 
     async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse:
         params = await self._get_params(request)
diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py
index 84c8267ae..1a65f6aa1 100644
--- a/llama_stack/providers/remote/safety/sambanova/sambanova.py
+++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py
@@ -33,6 +33,7 @@ CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?"
 class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData):
     def __init__(self, config: SambaNovaSafetyConfig) -> None:
         self.config = config
+        self.environment_available_models = []
 
     async def initialize(self) -> None:
         pass
@@ -54,18 +55,18 @@ class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProvide
 
     async def register_shield(self, shield: Shield) -> None:
         list_models_url = self.config.url + "/models"
-        try:
-            response = requests.get(list_models_url)
-            response.raise_for_status()
-        except requests.exceptions.RequestException as e:
-            raise RuntimeError(f"Request to {list_models_url} failed") from e
-        available_models = [model.get("id") for model in response.json().get("data", {})]
+        if len(self.environment_available_models) == 0:
+            try:
+                response = requests.get(list_models_url)
+                response.raise_for_status()
+            except requests.exceptions.RequestException as e:
+                raise RuntimeError(f"Request to {list_models_url} failed") from e
+            self.environment_available_models = [model.get("id") for model in response.json().get("data", {})]
         if (
-            len(available_models) == 0
-            or "guard" not in shield.provider_resource_id.lower()
-            or shield.provider_resource_id.split("sambanova/")[-1] not in available_models
+            "guard" not in shield.provider_resource_id.lower()
+            or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models
         ):
-            raise ValueError(f"Shield {shield.provider_resource_id} not found in SambaNova")
+            logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}")
 
     async def run_shield(
         self, shield_id: str, messages: list[Message], params: dict[str, Any] | None = None
diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
new file mode 100644
index 000000000..f27c9d802
--- /dev/null
+++ b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx
@@ -0,0 +1,82 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { useParams, useRouter } from "next/navigation";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
+import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
+import { VectorStoreDetailView } from "@/components/vector-stores/vector-store-detail";
+
+export default function VectorStoreDetailPage() {
+  const params = useParams();
+  const id = params.id as string;
+  const client = useAuthClient();
+  const router = useRouter();
+
+  const [store, setStore] = useState<VectorStore | null>(null);
+  const [files, setFiles] = useState<VectorStoreFile[]>([]);
+  const [isLoadingStore, setIsLoadingStore] = useState(true);
+  const [isLoadingFiles, setIsLoadingFiles] = useState(true);
+  const [errorStore, setErrorStore] = useState<Error | null>(null);
+  const [errorFiles, setErrorFiles] = useState<Error | null>(null);
+
+  useEffect(() => {
+    if (!id) {
+      setErrorStore(new Error("Vector Store ID is missing."));
+      setIsLoadingStore(false);
+      return;
+    }
+    const fetchStore = async () => {
+      setIsLoadingStore(true);
+      setErrorStore(null);
+      try {
+        const response = await client.vectorStores.retrieve(id);
+        setStore(response as VectorStore);
+      } catch (err) {
+        setErrorStore(
+          err instanceof Error
+            ? err
+            : new Error("Failed to load vector store."),
+        );
+      } finally {
+        setIsLoadingStore(false);
+      }
+    };
+    fetchStore();
+  }, [id, client]);
+
+  useEffect(() => {
+    if (!id) {
+      setErrorFiles(new Error("Vector Store ID is missing."));
+      setIsLoadingFiles(false);
+      return;
+    }
+    const fetchFiles = async () => {
+      setIsLoadingFiles(true);
+      setErrorFiles(null);
+      try {
+        const result = await client.vectorStores.files.list(id as any);
+        setFiles((result as any).data);
+      } catch (err) {
+        setErrorFiles(
+          err instanceof Error ? err : new Error("Failed to load files."),
+        );
+      } finally {
+        setIsLoadingFiles(false);
+      }
+    };
+    fetchFiles();
+  }, [id]);
+
+  return (
+    <VectorStoreDetailView
+      store={store}
+      files={files}
+      isLoadingStore={isLoadingStore}
+      isLoadingFiles={isLoadingFiles}
+      errorStore={errorStore}
+      errorFiles={errorFiles}
+      id={id}
+    />
+  );
+}
diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/llama_stack/ui/app/logs/vector-stores/layout.tsx
new file mode 100644
index 000000000..9245f5486
--- /dev/null
+++ b/llama_stack/ui/app/logs/vector-stores/layout.tsx
@@ -0,0 +1,16 @@
+"use client";
+
+import React from "react";
+import LogsLayout from "@/components/layout/logs-layout";
+
+export default function VectorStoresLayout({
+  children,
+}: {
+  children: React.ReactNode;
+}) {
+  return (
+    <LogsLayout sectionLabel="Vector Stores" basePath="/logs/vector-stores">
+      {children}
+    </LogsLayout>
+  );
+}
diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/llama_stack/ui/app/logs/vector-stores/page.tsx
new file mode 100644
index 000000000..29e1fabd6
--- /dev/null
+++ b/llama_stack/ui/app/logs/vector-stores/page.tsx
@@ -0,0 +1,121 @@
+"use client";
+
+import React from "react";
+import { useAuthClient } from "@/hooks/use-auth-client";
+import type {
+  ListVectorStoresResponse,
+  VectorStore,
+} from "llama-stack-client/resources/vector-stores/vector-stores";
+import { useRouter } from "next/navigation";
+import { usePagination } from "@/hooks/use-pagination";
+import {
+  Table,
+  TableBody,
+  TableCaption,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from "@/components/ui/table";
+import { Skeleton } from "@/components/ui/skeleton";
+
+export default function VectorStoresPage() {
+  const client = useAuthClient();
+  const router = useRouter();
+  const {
+    data: stores,
+    status,
+    hasMore,
+    error,
+    loadMore,
+  } = usePagination<VectorStore>({
+    limit: 20,
+    order: "desc",
+    fetchFunction: async (client, params) => {
+      const response = await client.vectorStores.list({
+        after: params.after,
+        limit: params.limit,
+        order: params.order,
+      } as any);
+      return response as ListVectorStoresResponse;
+    },
+    errorMessagePrefix: "vector stores",
+  });
+
+  // Auto-load all pages for infinite scroll behavior (like Responses)
+  React.useEffect(() => {
+    if (status === "idle" && hasMore) {
+      loadMore();
+    }
+  }, [status, hasMore, loadMore]);
+
+  if (status === "loading") {
+    return (
+      <div className="space-y-2">
+        <Skeleton className="h-8 w-full" />
+        <Skeleton className="h-4 w-full" />
+        <Skeleton className="h-4 w-full" />
+      </div>
+    );
+  }
+
+  if (status === "error") {
+    return <div className="text-destructive">Error: {error?.message}</div>;
+  }
+
+  if (!stores || stores.length === 0) {
+    return <p>No vector stores found.</p>;
+  }
+
+  return (
+    <div className="overflow-auto flex-1 min-h-0">
+      <Table>
+        <TableHeader>
+          <TableRow>
+            <TableHead>ID</TableHead>
+            <TableHead>Name</TableHead>
+            <TableHead>Created</TableHead>
+            <TableHead>Completed</TableHead>
+            <TableHead>Cancelled</TableHead>
+            <TableHead>Failed</TableHead>
+            <TableHead>In Progress</TableHead>
+            <TableHead>Total</TableHead>
+            <TableHead>Usage Bytes</TableHead>
+            <TableHead>Provider ID</TableHead>
+            <TableHead>Provider Vector DB ID</TableHead>
+          </TableRow>
+        </TableHeader>
+        <TableBody>
+          {stores.map((store) => {
+            const fileCounts = store.file_counts;
+            const metadata = store.metadata || {};
+            const providerId = metadata.provider_id ?? "";
+            const providerDbId = metadata.provider_vector_db_id ?? "";
+
+            return (
+              <TableRow
+                key={store.id}
+                onClick={() => router.push(`/logs/vector-stores/${store.id}`)}
+                className="cursor-pointer hover:bg-muted/50"
+              >
+                <TableCell>{store.id}</TableCell>
+                <TableCell>{store.name}</TableCell>
+                <TableCell>
+                  {new Date(store.created_at * 1000).toLocaleString()}
+                </TableCell>
+                <TableCell>{fileCounts.completed}</TableCell>
+                <TableCell>{fileCounts.cancelled}</TableCell>
+                <TableCell>{fileCounts.failed}</TableCell>
+                <TableCell>{fileCounts.in_progress}</TableCell>
+                <TableCell>{fileCounts.total}</TableCell>
+                <TableCell>{store.usage_bytes}</TableCell>
+                <TableCell>{providerId}</TableCell>
+                <TableCell>{providerDbId}</TableCell>
+              </TableRow>
+            );
+          })}
+        </TableBody>
+      </Table>
+    </div>
+  );
+}
diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx
index 1c53d6cc5..532e43dbd 100644
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@@ -1,6 +1,11 @@
 "use client";
 
-import { MessageSquareText, MessagesSquare, MoveUpRight } from "lucide-react";
+import {
+  MessageSquareText,
+  MessagesSquare,
+  MoveUpRight,
+  Database,
+} from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
 import { cn } from "@/lib/utils";
@@ -28,6 +33,11 @@ const logItems = [
     url: "/logs/responses",
     icon: MessagesSquare,
   },
+  {
+    title: "Vector Stores",
+    url: "/logs/vector-stores",
+    icon: Database,
+  },
   {
     title: "Documentation",
     url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html",
@@ -57,13 +67,13 @@ export function AppSidebar() {
                       className={cn(
                         "justify-start",
                         isActive &&
-                          "bg-gray-200 hover:bg-gray-200 text-primary hover:text-primary",
+                          "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
                       )}
                     >
                       <Link href={item.url}>
                         <item.icon
                           className={cn(
-                            isActive && "text-primary",
+                            isActive && "text-gray-900 dark:text-gray-100",
                             "mr-2 h-4 w-4",
                           )}
                         />
diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/llama_stack/ui/components/layout/detail-layout.tsx
index 58b912703..3013195a2 100644
--- a/llama_stack/ui/components/layout/detail-layout.tsx
+++ b/llama_stack/ui/components/layout/detail-layout.tsx
@@ -93,7 +93,9 @@ export function PropertyItem({
     >
       <strong>{label}:</strong>{" "}
       {typeof value === "string" || typeof value === "number" ? (
-        <span className="text-gray-900 font-medium">{value}</span>
+        <span className="text-gray-900 dark:text-gray-100 font-medium">
+          {value}
+        </span>
       ) : (
         value
       )}
@@ -112,7 +114,9 @@ export function PropertiesCard({ children }: PropertiesCardProps) {
         <CardTitle>Properties</CardTitle>
       </CardHeader>
       <CardContent>
-        <ul className="space-y-2 text-sm text-gray-600">{children}</ul>
+        <ul className="space-y-2 text-sm text-gray-600 dark:text-gray-400">
+          {children}
+        </ul>
       </CardContent>
     </Card>
   );
diff --git a/llama_stack/ui/components/ui/message-components.tsx b/llama_stack/ui/components/ui/message-components.tsx
index 50ccd623e..39cb570b7 100644
--- a/llama_stack/ui/components/ui/message-components.tsx
+++ b/llama_stack/ui/components/ui/message-components.tsx
@@ -17,10 +17,10 @@ export const MessageBlock: React.FC<MessageBlockProps> = ({
 }) => {
   return (
     <div className={`mb-4 ${className}`}>
-      <p className="py-1 font-semibold text-gray-800 mb-1">
+      <p className="py-1 font-semibold text-muted-foreground mb-1">
         {label}
         {labelDetail && (
-          <span className="text-xs text-gray-500 font-normal ml-1">
+          <span className="text-xs text-muted-foreground font-normal ml-1">
             {labelDetail}
           </span>
         )}
diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
new file mode 100644
index 000000000..7c5c91dd3
--- /dev/null
+++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx
@@ -0,0 +1,128 @@
+"use client";
+
+import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores";
+import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files";
+import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card";
+import { Skeleton } from "@/components/ui/skeleton";
+import {
+  DetailLoadingView,
+  DetailErrorView,
+  DetailNotFoundView,
+  DetailLayout,
+  PropertiesCard,
+  PropertyItem,
+} from "@/components/layout/detail-layout";
+import {
+  Table,
+  TableBody,
+  TableCaption,
+  TableCell,
+  TableHead,
+  TableHeader,
+  TableRow,
+} from "@/components/ui/table";
+
+interface VectorStoreDetailViewProps {
+  store: VectorStore | null;
+  files: VectorStoreFile[];
+  isLoadingStore: boolean;
+  isLoadingFiles: boolean;
+  errorStore: Error | null;
+  errorFiles: Error | null;
+  id: string;
+}
+
+export function VectorStoreDetailView({
+  store,
+  files,
+  isLoadingStore,
+  isLoadingFiles,
+  errorStore,
+  errorFiles,
+  id,
+}: VectorStoreDetailViewProps) {
+  const title = "Vector Store Details";
+
+  if (errorStore) {
+    return <DetailErrorView title={title} id={id} error={errorStore} />;
+  }
+  if (isLoadingStore) {
+    return <DetailLoadingView title={title} />;
+  }
+  if (!store) {
+    return <DetailNotFoundView title={title} id={id} />;
+  }
+
+  const mainContent = (
+    <>
+      <Card>
+        <CardHeader>
+          <CardTitle>Files</CardTitle>
+        </CardHeader>
+        <CardContent>
+          {isLoadingFiles ? (
+            <Skeleton className="h-4 w-full" />
+          ) : errorFiles ? (
+            <div className="text-destructive text-sm">
+              Error loading files: {errorFiles.message}
+            </div>
+          ) : files.length > 0 ? (
+            <Table>
+              <TableCaption>Files in this vector store</TableCaption>
+              <TableHeader>
+                <TableRow>
+                  <TableHead>ID</TableHead>
+                  <TableHead>Status</TableHead>
+                  <TableHead>Created</TableHead>
+                  <TableHead>Usage Bytes</TableHead>
+                </TableRow>
+              </TableHeader>
+              <TableBody>
+                {files.map((file) => (
+                  <TableRow key={file.id}>
+                    <TableCell>{file.id}</TableCell>
+                    <TableCell>{file.status}</TableCell>
+                    <TableCell>
+                      {new Date(file.created_at * 1000).toLocaleString()}
+                    </TableCell>
+                    <TableCell>{file.usage_bytes}</TableCell>
+                  </TableRow>
+                ))}
+              </TableBody>
+            </Table>
+          ) : (
+            <p className="text-gray-500 italic text-sm">
+              No files in this vector store.
+            </p>
+          )}
+        </CardContent>
+      </Card>
+    </>
+  );
+
+  const sidebar = (
+    <PropertiesCard>
+      <PropertyItem label="ID" value={store.id} />
+      <PropertyItem label="Name" value={store.name || ""} />
+      <PropertyItem
+        label="Created"
+        value={new Date(store.created_at * 1000).toLocaleString()}
+      />
+      <PropertyItem label="Status" value={store.status} />
+      <PropertyItem label="Total Files" value={store.file_counts.total} />
+      <PropertyItem label="Usage Bytes" value={store.usage_bytes} />
+      <PropertyItem
+        label="Provider ID"
+        value={(store.metadata.provider_id as string) || ""}
+      />
+      <PropertyItem
+        label="Provider DB ID"
+        value={(store.metadata.provider_vector_db_id as string) || ""}
+      />
+    </PropertiesCard>
+  );
+
+  return (
+    <DetailLayout title={title} mainContent={mainContent} sidebar={sidebar} />
+  );
+}
diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json
index 8fd5fb56c..158569241 100644
--- a/llama_stack/ui/package-lock.json
+++ b/llama_stack/ui/package-lock.json
@@ -15,7 +15,7 @@
         "@radix-ui/react-tooltip": "^1.2.6",
         "class-variance-authority": "^0.7.1",
         "clsx": "^2.1.1",
-        "llama-stack-client": "0.2.13",
+        "llama-stack-client": "^0.2.14",
         "lucide-react": "^0.510.0",
         "next": "15.3.3",
         "next-auth": "^4.24.11",
@@ -676,406 +676,6 @@
         "tslib": "^2.4.0"
       }
     },
-    "node_modules/@esbuild/aix-ppc64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz",
-      "integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==",
-      "cpu": [
-        "ppc64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "aix"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-arm": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz",
-      "integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz",
-      "integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/android-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz",
-      "integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "android"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/darwin-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz",
-      "integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/darwin-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz",
-      "integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "darwin"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/freebsd-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz",
-      "integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "freebsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/freebsd-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz",
-      "integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "freebsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-arm": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz",
-      "integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==",
-      "cpu": [
-        "arm"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz",
-      "integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-ia32": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz",
-      "integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==",
-      "cpu": [
-        "ia32"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-loong64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz",
-      "integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==",
-      "cpu": [
-        "loong64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-mips64el": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz",
-      "integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==",
-      "cpu": [
-        "mips64el"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-ppc64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz",
-      "integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==",
-      "cpu": [
-        "ppc64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-riscv64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz",
-      "integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==",
-      "cpu": [
-        "riscv64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-s390x": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz",
-      "integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==",
-      "cpu": [
-        "s390x"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/linux-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz",
-      "integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "linux"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/netbsd-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz",
-      "integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "netbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/netbsd-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz",
-      "integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "netbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/openbsd-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz",
-      "integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "openbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/openbsd-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz",
-      "integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "openbsd"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/sunos-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz",
-      "integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "sunos"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-arm64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz",
-      "integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==",
-      "cpu": [
-        "arm64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-ia32": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz",
-      "integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==",
-      "cpu": [
-        "ia32"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
-    "node_modules/@esbuild/win32-x64": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz",
-      "integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==",
-      "cpu": [
-        "x64"
-      ],
-      "license": "MIT",
-      "optional": true,
-      "os": [
-        "win32"
-      ],
-      "engines": {
-        "node": ">=18"
-      }
-    },
     "node_modules/@eslint-community/eslint-utils": {
       "version": "4.7.0",
       "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz",
@@ -5999,46 +5599,6 @@
         "url": "https://github.com/sponsors/ljharb"
       }
     },
-    "node_modules/esbuild": {
-      "version": "0.25.5",
-      "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz",
-      "integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==",
-      "hasInstallScript": true,
-      "license": "MIT",
-      "bin": {
-        "esbuild": "bin/esbuild"
-      },
-      "engines": {
-        "node": ">=18"
-      },
-      "optionalDependencies": {
-        "@esbuild/aix-ppc64": "0.25.5",
-        "@esbuild/android-arm": "0.25.5",
-        "@esbuild/android-arm64": "0.25.5",
-        "@esbuild/android-x64": "0.25.5",
-        "@esbuild/darwin-arm64": "0.25.5",
-        "@esbuild/darwin-x64": "0.25.5",
-        "@esbuild/freebsd-arm64": "0.25.5",
-        "@esbuild/freebsd-x64": "0.25.5",
-        "@esbuild/linux-arm": "0.25.5",
-        "@esbuild/linux-arm64": "0.25.5",
-        "@esbuild/linux-ia32": "0.25.5",
-        "@esbuild/linux-loong64": "0.25.5",
-        "@esbuild/linux-mips64el": "0.25.5",
-        "@esbuild/linux-ppc64": "0.25.5",
-        "@esbuild/linux-riscv64": "0.25.5",
-        "@esbuild/linux-s390x": "0.25.5",
-        "@esbuild/linux-x64": "0.25.5",
-        "@esbuild/netbsd-arm64": "0.25.5",
-        "@esbuild/netbsd-x64": "0.25.5",
-        "@esbuild/openbsd-arm64": "0.25.5",
-        "@esbuild/openbsd-x64": "0.25.5",
-        "@esbuild/sunos-x64": "0.25.5",
-        "@esbuild/win32-arm64": "0.25.5",
-        "@esbuild/win32-ia32": "0.25.5",
-        "@esbuild/win32-x64": "0.25.5"
-      }
-    },
     "node_modules/escalade": {
       "version": "3.2.0",
       "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz",
@@ -6993,6 +6553,7 @@
       "version": "2.3.3",
       "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
       "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==",
+      "dev": true,
       "hasInstallScript": true,
       "license": "MIT",
       "optional": true,
@@ -7154,6 +6715,7 @@
       "version": "4.10.0",
       "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz",
       "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==",
+      "dev": true,
       "license": "MIT",
       "dependencies": {
         "resolve-pkg-maps": "^1.0.0"
@@ -9537,9 +9099,10 @@
       "license": "MIT"
     },
     "node_modules/llama-stack-client": {
-      "version": "0.2.13",
-      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.13.tgz",
-      "integrity": "sha512-R1rTFLwgUimr+KjEUkzUvFL6vLASwS9qj3UDSVkJ5BmrKAs5GwVAMeL7yZaTBXGuPUVh124WSlC4d9H0FjWqLA==",
+      "version": "0.2.14",
+      "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.14.tgz",
+      "integrity": "sha512-bVU3JHp+EPEKR0Vb9vcd9ZyQj/72jSDuptKLwOXET9WrkphIQ8xuW5ueecMTgq8UEls3lwB3HiZM2cDOR9eDsQ==",
+      "license": "Apache-2.0",
       "dependencies": {
         "@types/node": "^18.11.18",
         "@types/node-fetch": "^2.6.4",
@@ -9547,8 +9110,7 @@
         "agentkeepalive": "^4.2.1",
         "form-data-encoder": "1.7.2",
         "formdata-node": "^4.3.2",
-        "node-fetch": "^2.6.7",
-        "tsx": "^4.19.2"
+        "node-fetch": "^2.6.7"
       }
     },
     "node_modules/llama-stack-client/node_modules/@types/node": {
@@ -11148,6 +10710,7 @@
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz",
       "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==",
+      "dev": true,
       "license": "MIT",
       "funding": {
         "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1"
@@ -12198,25 +11761,6 @@
       "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
       "license": "0BSD"
     },
-    "node_modules/tsx": {
-      "version": "4.19.4",
-      "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz",
-      "integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==",
-      "license": "MIT",
-      "dependencies": {
-        "esbuild": "~0.25.0",
-        "get-tsconfig": "^4.7.5"
-      },
-      "bin": {
-        "tsx": "dist/cli.mjs"
-      },
-      "engines": {
-        "node": ">=18.0.0"
-      },
-      "optionalDependencies": {
-        "fsevents": "~2.3.3"
-      }
-    },
     "node_modules/tw-animate-css": {
       "version": "1.2.9",
       "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz",
diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json
index 9524ce0a5..b38efe309 100644
--- a/llama_stack/ui/package.json
+++ b/llama_stack/ui/package.json
@@ -20,7 +20,7 @@
     "@radix-ui/react-tooltip": "^1.2.6",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
-    "llama-stack-client": "0.2.13",
+    "llama-stack-client": "^0.2.14",
     "lucide-react": "^0.510.0",
     "next": "15.3.3",
     "next-auth": "^4.24.11",
diff --git a/pyproject.toml b/pyproject.toml
index d84a823a3..2974ff996 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -42,8 +42,8 @@ dependencies = [
     "h11>=0.16.0",
     "python-multipart>=0.0.20",               # For fastapi Form
     "uvicorn>=0.34.0",                        # server
-    "opentelemetry-sdk",                      # server
-    "opentelemetry-exporter-otlp-proto-http", # server
+    "opentelemetry-sdk>=1.30.0",              # server
+    "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server
     "aiosqlite>=0.21.0",                      # server - for metadata store
     "asyncpg",                                # for metadata store
 ]
@@ -58,12 +58,13 @@ ui = [
 
 [dependency-groups]
 dev = [
-    "pytest",
+    "pytest>=8.4",
     "pytest-timeout",
-    "pytest-asyncio",
+    "pytest-asyncio>=1.0",
     "pytest-cov",
     "pytest-html",
     "pytest-json-report",
+    "pytest-socket",      # For blocking network access in unit tests
     "nbval",              # For notebook testing
     "black",
     "ruff",
@@ -87,6 +88,8 @@ unit = [
     "blobfile",
     "faiss-cpu",
     "pymilvus>=2.5.12",
+    "litellm",
+    "together",
 ]
 # These are the core dependencies required for running integration tests. They are shared across all
 # providers. If a provider requires additional dependencies, please add them to your environment
@@ -339,3 +342,9 @@ warn_required_dynamic_aliases = true
 
 [tool.ruff.lint.pep8-naming]
 classmethod-decorators = ["classmethod", "pydantic.field_validator"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+markers = [
+    "allow_network: Allow network access for specific unit tests",
+]
diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh
index 1fc3ff7fc..68d6458fc 100755
--- a/scripts/unit-tests.sh
+++ b/scripts/unit-tests.sh
@@ -16,4 +16,4 @@ if [ $FOUND_PYTHON -ne 0 ]; then
      uv python install "$PYTHON_VERSION"
 fi
 
-uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@
+uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest -s -v tests/unit/ $@
diff --git a/tests/integration/agents/test_persistence.py b/tests/integration/agents/test_persistence.py
index ef35c97a5..49d9d42d0 100644
--- a/tests/integration/agents/test_persistence.py
+++ b/tests/integration/agents/test_persistence.py
@@ -44,7 +44,6 @@ def common_params(inference_model):
     )
 
 
-@pytest.mark.asyncio
 @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world")
 async def test_delete_agents_and_sessions(self, agents_stack, common_params):
     agents_impl = agents_stack.impls[Api.agents]
@@ -73,7 +72,6 @@ async def test_delete_agents_and_sessions(self, agents_stack, common_params):
     assert agent_response is None
 
 
-@pytest.mark.asyncio
 @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world")
 async def test_get_agent_turns_and_steps(self, agents_stack, sample_messages, common_params):
     agents_impl = agents_stack.impls[Api.agents]
diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py
index 05aee5096..e82714ffd 100644
--- a/tests/integration/inference/test_openai_completion.py
+++ b/tests/integration/inference/test_openai_completion.py
@@ -71,7 +71,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode
         "remote::cerebras",
         "remote::databricks",
         "remote::runpod",
-        "remote::sambanova",
         "remote::tgi",
     ):
         pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.")
diff --git a/tests/integration/inspect/test_inspect.py b/tests/integration/inspect/test_inspect.py
index da704178d..1597a319b 100644
--- a/tests/integration/inspect/test_inspect.py
+++ b/tests/integration/inspect/test_inspect.py
@@ -4,20 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import pytest
 from llama_stack_client import LlamaStackClient
 
 from llama_stack import LlamaStackAsLibraryClient
 
 
 class TestInspect:
-    @pytest.mark.asyncio
     def test_health(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
         health = llama_stack_client.inspect.health()
         assert health is not None
         assert health.status == "OK"
 
-    @pytest.mark.asyncio
     def test_version(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
         version = llama_stack_client.inspect.version()
         assert version is not None
diff --git a/tests/integration/providers/test_providers.py b/tests/integration/providers/test_providers.py
index 8b153411c..fc65e2a10 100644
--- a/tests/integration/providers/test_providers.py
+++ b/tests/integration/providers/test_providers.py
@@ -4,14 +4,12 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import pytest
 from llama_stack_client import LlamaStackClient
 
 from llama_stack import LlamaStackAsLibraryClient
 
 
 class TestProviders:
-    @pytest.mark.asyncio
     def test_providers(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient):
         provider_list = llama_stack_client.providers.list()
         assert provider_list is not None
diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
index bf6077532..c32d6cd17 100644
--- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
+++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py
@@ -88,7 +88,6 @@ async def cleanup_records(sql_store, table_name, record_ids):
             pass
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS)
 @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_store_attributes(mock_get_authenticated_user, authorized_store, request):
@@ -183,7 +182,6 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz
         await cleanup_records(authorized_store.sql_store, table_name, ["1", "2", "3", "4", "5", "6"])
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS)
 @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_user_ownership_policy(mock_get_authenticated_user, authorized_store, request):
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index aedac0386..b5eb1217d 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -4,6 +4,17 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
+import pytest_socket
+
 # We need to import the fixtures here so that pytest can find them
 # but ruff doesn't think they are used and removes the import. "noqa: F401" prevents them from being removed
 from .fixtures import cached_disk_dist_registry, disk_dist_registry, sqlite_kvstore  # noqa: F401
+
+
+def pytest_runtest_setup(item):
+    """Setup for each test - check if network access should be allowed."""
+    if "allow_network" in item.keywords:
+        pytest_socket.enable_socket()
+    else:
+        # Allowing Unix sockets is necessary for some tests that use local servers and mocks
+        pytest_socket.disable_socket(allow_unix_socket=True)
diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py
index 0eeb68167..3ba042bd9 100644
--- a/tests/unit/distribution/routers/test_routing_tables.py
+++ b/tests/unit/distribution/routers/test_routing_tables.py
@@ -8,8 +8,6 @@
 
 from unittest.mock import AsyncMock
 
-import pytest
-
 from llama_stack.apis.common.type_system import NumberType
 from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource
 from llama_stack.apis.datatypes import Api
@@ -119,7 +117,6 @@ class ToolGroupsImpl(Impl):
         )
 
 
-@pytest.mark.asyncio
 async def test_models_routing_table(cached_disk_dist_registry):
     table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
@@ -161,7 +158,6 @@ async def test_models_routing_table(cached_disk_dist_registry):
     assert len(openai_models.data) == 0
 
 
-@pytest.mark.asyncio
 async def test_shields_routing_table(cached_disk_dist_registry):
     table = ShieldsRoutingTable({"test_provider": SafetyImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
@@ -177,7 +173,6 @@ async def test_shields_routing_table(cached_disk_dist_registry):
     assert "test-shield-2" in shield_ids
 
 
-@pytest.mark.asyncio
 async def test_vectordbs_routing_table(cached_disk_dist_registry):
     table = VectorDBsRoutingTable({"test_provider": VectorDBImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
@@ -233,7 +228,6 @@ async def test_datasets_routing_table(cached_disk_dist_registry):
     assert len(datasets.data) == 0
 
 
-@pytest.mark.asyncio
 async def test_scoring_functions_routing_table(cached_disk_dist_registry):
     table = ScoringFunctionsRoutingTable({"test_provider": ScoringFunctionsImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
@@ -259,7 +253,6 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry):
     assert "test-scoring-fn-2" in scoring_fn_ids
 
 
-@pytest.mark.asyncio
 async def test_benchmarks_routing_table(cached_disk_dist_registry):
     table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
@@ -277,7 +270,6 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry):
     assert "test-benchmark" in benchmark_ids
 
 
-@pytest.mark.asyncio
 async def test_tool_groups_routing_table(cached_disk_dist_registry):
     table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {})
     await table.initialize()
diff --git a/tests/unit/distribution/test_context.py b/tests/unit/distribution/test_context.py
index 84944bfe8..7914be51d 100644
--- a/tests/unit/distribution/test_context.py
+++ b/tests/unit/distribution/test_context.py
@@ -13,7 +13,6 @@ import pytest
 from llama_stack.distribution.utils.context import preserve_contexts_async_generator
 
 
-@pytest.mark.asyncio
 async def test_preserve_contexts_with_exception():
     # Create context variable
     context_var = ContextVar("exception_var", default="initial")
@@ -41,7 +40,6 @@ async def test_preserve_contexts_with_exception():
     context_var.reset(token)
 
 
-@pytest.mark.asyncio
 async def test_preserve_contexts_empty_generator():
     # Create context variable
     context_var = ContextVar("empty_var", default="initial")
@@ -66,7 +64,6 @@ async def test_preserve_contexts_empty_generator():
     context_var.reset(token)
 
 
-@pytest.mark.asyncio
 async def test_preserve_contexts_across_event_loops():
     """
     Test that context variables are preserved across event loop boundaries with nested generators.
diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py
index ef1dc9743..785077e91 100644
--- a/tests/unit/files/test_files.py
+++ b/tests/unit/files/test_files.py
@@ -6,7 +6,6 @@
 
 
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.common.responses import Order
 from llama_stack.apis.files import OpenAIFilePurpose
@@ -29,7 +28,7 @@ class MockUploadFile:
         return self.content
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def files_provider(tmp_path):
     """Create a files provider with temporary storage for testing."""
     storage_dir = tmp_path / "files"
@@ -68,7 +67,6 @@ def large_file():
 class TestOpenAIFilesAPI:
     """Test suite for OpenAI Files API endpoints."""
 
-    @pytest.mark.asyncio
     async def test_upload_file_success(self, files_provider, sample_text_file):
         """Test successful file upload."""
         # Upload file
@@ -82,7 +80,6 @@ class TestOpenAIFilesAPI:
         assert result.created_at > 0
         assert result.expires_at > result.created_at
 
-    @pytest.mark.asyncio
     async def test_upload_different_purposes(self, files_provider, sample_text_file):
         """Test uploading files with different purposes."""
         purposes = list(OpenAIFilePurpose)
@@ -93,7 +90,6 @@ class TestOpenAIFilesAPI:
             uploaded_files.append(result)
             assert result.purpose == purpose
 
-    @pytest.mark.asyncio
     async def test_upload_different_file_types(self, files_provider, sample_text_file, sample_json_file, large_file):
         """Test uploading different types and sizes of files."""
         files_to_test = [
@@ -107,7 +103,6 @@ class TestOpenAIFilesAPI:
             assert result.filename == expected_filename
             assert result.bytes == len(file_obj.content)
 
-    @pytest.mark.asyncio
     async def test_list_files_empty(self, files_provider):
         """Test listing files when no files exist."""
         result = await files_provider.openai_list_files()
@@ -117,7 +112,6 @@ class TestOpenAIFilesAPI:
         assert result.first_id == ""
         assert result.last_id == ""
 
-    @pytest.mark.asyncio
     async def test_list_files_with_content(self, files_provider, sample_text_file, sample_json_file):
         """Test listing files when files exist."""
         # Upload multiple files
@@ -132,7 +126,6 @@ class TestOpenAIFilesAPI:
         assert file1.id in file_ids
         assert file2.id in file_ids
 
-    @pytest.mark.asyncio
     async def test_list_files_with_purpose_filter(self, files_provider, sample_text_file):
         """Test listing files with purpose filtering."""
         # Upload file with specific purpose
@@ -146,7 +139,6 @@ class TestOpenAIFilesAPI:
         assert result.data[0].id == uploaded_file.id
         assert result.data[0].purpose == OpenAIFilePurpose.ASSISTANTS
 
-    @pytest.mark.asyncio
     async def test_list_files_with_limit(self, files_provider, sample_text_file):
         """Test listing files with limit parameter."""
         # Upload multiple files
@@ -157,7 +149,6 @@ class TestOpenAIFilesAPI:
         result = await files_provider.openai_list_files(limit=3)
         assert len(result.data) == 3
 
-    @pytest.mark.asyncio
     async def test_list_files_with_order(self, files_provider, sample_text_file):
         """Test listing files with different order."""
         # Upload multiple files
@@ -178,7 +169,6 @@ class TestOpenAIFilesAPI:
         # Oldest should be first
         assert result_asc.data[0].created_at <= result_asc.data[1].created_at <= result_asc.data[2].created_at
 
-    @pytest.mark.asyncio
     async def test_retrieve_file_success(self, files_provider, sample_text_file):
         """Test successful file retrieval."""
         # Upload file
@@ -197,13 +187,11 @@ class TestOpenAIFilesAPI:
         assert retrieved_file.created_at == uploaded_file.created_at
         assert retrieved_file.expires_at == uploaded_file.expires_at
 
-    @pytest.mark.asyncio
     async def test_retrieve_file_not_found(self, files_provider):
         """Test retrieving a non-existent file."""
         with pytest.raises(ValueError, match="File with id file-nonexistent not found"):
             await files_provider.openai_retrieve_file("file-nonexistent")
 
-    @pytest.mark.asyncio
     async def test_retrieve_file_content_success(self, files_provider, sample_text_file):
         """Test successful file content retrieval."""
         # Upload file
@@ -217,13 +205,11 @@ class TestOpenAIFilesAPI:
         # Verify content
         assert content.body == sample_text_file.content
 
-    @pytest.mark.asyncio
     async def test_retrieve_file_content_not_found(self, files_provider):
         """Test retrieving content of a non-existent file."""
         with pytest.raises(ValueError, match="File with id file-nonexistent not found"):
             await files_provider.openai_retrieve_file_content("file-nonexistent")
 
-    @pytest.mark.asyncio
     async def test_delete_file_success(self, files_provider, sample_text_file):
         """Test successful file deletion."""
         # Upload file
@@ -245,13 +231,11 @@ class TestOpenAIFilesAPI:
         with pytest.raises(ValueError, match=f"File with id {uploaded_file.id} not found"):
             await files_provider.openai_retrieve_file(uploaded_file.id)
 
-    @pytest.mark.asyncio
     async def test_delete_file_not_found(self, files_provider):
         """Test deleting a non-existent file."""
         with pytest.raises(ValueError, match="File with id file-nonexistent not found"):
             await files_provider.openai_delete_file("file-nonexistent")
 
-    @pytest.mark.asyncio
     async def test_file_persistence_across_operations(self, files_provider, sample_text_file):
         """Test that files persist correctly across multiple operations."""
         # Upload file
@@ -279,7 +263,6 @@ class TestOpenAIFilesAPI:
         files_list = await files_provider.openai_list_files()
         assert len(files_list.data) == 0
 
-    @pytest.mark.asyncio
     async def test_multiple_files_operations(self, files_provider, sample_text_file, sample_json_file):
         """Test operations with multiple files."""
         # Upload multiple files
@@ -302,7 +285,6 @@ class TestOpenAIFilesAPI:
         content = await files_provider.openai_retrieve_file_content(file2.id)
         assert content.body == sample_json_file.content
 
-    @pytest.mark.asyncio
     async def test_file_id_uniqueness(self, files_provider, sample_text_file):
         """Test that each uploaded file gets a unique ID."""
         file_ids = set()
@@ -316,7 +298,6 @@ class TestOpenAIFilesAPI:
             file_ids.add(uploaded_file.id)
             assert uploaded_file.id.startswith("file-")
 
-    @pytest.mark.asyncio
     async def test_file_no_filename_handling(self, files_provider):
         """Test handling files with no filename."""
         file_without_name = MockUploadFile(b"content", None)  # No filename
@@ -327,7 +308,6 @@ class TestOpenAIFilesAPI:
 
         assert uploaded_file.filename == "uploaded_file"  # Default filename
 
-    @pytest.mark.asyncio
     async def test_after_pagination_works(self, files_provider, sample_text_file):
         """Test that 'after' pagination works correctly."""
         # Upload multiple files to test pagination
diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py
index 4e50c5e08..7174d2e78 100644
--- a/tests/unit/fixtures.py
+++ b/tests/unit/fixtures.py
@@ -4,14 +4,14 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import pytest_asyncio
+import pytest
 
 from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry
 from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
 from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl
 
 
-@pytest_asyncio.fixture(scope="function")
+@pytest.fixture(scope="function")
 async def sqlite_kvstore(tmp_path):
     db_path = tmp_path / "test_kv.db"
     kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix())
@@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path):
     yield kvstore
 
 
-@pytest_asyncio.fixture(scope="function")
+@pytest.fixture(scope="function")
 async def disk_dist_registry(sqlite_kvstore):
     registry = DiskDistributionRegistry(sqlite_kvstore)
     await registry.initialize()
     yield registry
 
 
-@pytest_asyncio.fixture(scope="function")
+@pytest.fixture(scope="function")
 async def cached_disk_dist_registry(sqlite_kvstore):
     registry = CachedDiskDistributionRegistry(sqlite_kvstore)
     await registry.initialize()
diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py
index 7a7d52892..c06d9ab0e 100644
--- a/tests/unit/providers/agent/test_meta_reference_agent.py
+++ b/tests/unit/providers/agent/test_meta_reference_agent.py
@@ -8,7 +8,6 @@ from datetime import datetime
 from unittest.mock import AsyncMock
 
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.agents import (
     Agent,
@@ -50,7 +49,7 @@ def config(tmp_path):
     )
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def agents_impl(config, mock_apis):
     impl = MetaReferenceAgentsImpl(
         config,
@@ -117,7 +116,6 @@ def sample_agent_config():
     )
 
 
-@pytest.mark.asyncio
 async def test_create_agent(agents_impl, sample_agent_config):
     response = await agents_impl.create_agent(sample_agent_config)
 
@@ -132,7 +130,6 @@ async def test_create_agent(agents_impl, sample_agent_config):
     assert isinstance(agent_info.created_at, datetime)
 
 
-@pytest.mark.asyncio
 async def test_get_agent(agents_impl, sample_agent_config):
     create_response = await agents_impl.create_agent(sample_agent_config)
     agent_id = create_response.agent_id
@@ -146,7 +143,6 @@ async def test_get_agent(agents_impl, sample_agent_config):
     assert isinstance(agent.created_at, datetime)
 
 
-@pytest.mark.asyncio
 async def test_list_agents(agents_impl, sample_agent_config):
     agent1_response = await agents_impl.create_agent(sample_agent_config)
     agent2_response = await agents_impl.create_agent(sample_agent_config)
@@ -160,7 +156,6 @@ async def test_list_agents(agents_impl, sample_agent_config):
     assert agent2_response.agent_id in agent_ids
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize("enable_session_persistence", [True, False])
 async def test_create_agent_session_persistence(agents_impl, sample_agent_config, enable_session_persistence):
     # Create an agent with specified persistence setting
@@ -188,7 +183,6 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config
         await agents_impl.get_agents_session(agent_id, session_response.session_id)
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize("enable_session_persistence", [True, False])
 async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, enable_session_persistence):
     # Create an agent with specified persistence setting
@@ -221,7 +215,6 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config,
     assert session2.session_id in {s["session_id"] for s in sessions.data}
 
 
-@pytest.mark.asyncio
 async def test_delete_agent(agents_impl, sample_agent_config):
     # Create an agent
     response = await agents_impl.create_agent(sample_agent_config)
diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
index 0d1ef8eca..6485e3512 100644
--- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py
+++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py
@@ -122,7 +122,6 @@ async def fake_stream(fixture: str = "simple_chat_completion.yaml"):
     )
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with a simple string input."""
     # Setup
@@ -155,7 +154,6 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
     assert result.output[0].content[0].text == "Dublin"
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with a simple string input and tools."""
     # Setup
@@ -224,7 +222,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon
         assert result.output[1].content[0].annotations == []
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with a tool call response that has a type of None."""
     # Setup
@@ -294,7 +291,6 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_
     assert chunks[1].response.output[0].name == "get_weather"
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with multiple messages."""
     # Setup
@@ -340,7 +336,6 @@ async def test_create_openai_response_with_multiple_messages(openai_responses_im
             assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam)
 
 
-@pytest.mark.asyncio
 async def test_prepend_previous_response_none(openai_responses_impl):
     """Test prepending no previous response to a new response."""
 
@@ -348,7 +343,6 @@ async def test_prepend_previous_response_none(openai_responses_impl):
     assert input == "fake_input"
 
 
-@pytest.mark.asyncio
 async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store):
     """Test prepending a basic previous response to a new response."""
 
@@ -388,7 +382,6 @@ async def test_prepend_previous_response_basic(openai_responses_impl, mock_respo
     assert input[2].content == "fake_input"
 
 
-@pytest.mark.asyncio
 async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store):
     """Test prepending a web search previous response to a new response."""
     input_item_message = OpenAIResponseMessage(
@@ -434,7 +427,6 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_
     assert input[3].content == "fake_input"
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api):
     # Setup
     input_text = "What is the capital of Ireland?"
@@ -463,7 +455,6 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m
     assert sent_messages[1].content == input_text
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_instructions_and_multiple_messages(
     openai_responses_impl, mock_inference_api
 ):
@@ -508,7 +499,6 @@ async def test_create_openai_response_with_instructions_and_multiple_messages(
     assert sent_messages[3].content == "Which is the largest?"
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_instructions_and_previous_response(
     openai_responses_impl, mock_responses_store, mock_inference_api
 ):
@@ -565,7 +555,6 @@ async def test_create_openai_response_with_instructions_and_previous_response(
     assert sent_messages[3].content == "Which is the largest?"
 
 
-@pytest.mark.asyncio
 async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store):
     """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters."""
     # Setup
@@ -601,7 +590,6 @@ async def test_list_openai_response_input_items_delegation(openai_responses_impl
     assert result.data[0].id == "msg_123"
 
 
-@pytest.mark.asyncio
 async def test_responses_store_list_input_items_logic():
     """Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting."""
 
@@ -680,7 +668,6 @@ async def test_responses_store_list_input_items_logic():
     assert len(result.data) == 0  # Should return no items
 
 
-@pytest.mark.asyncio
 async def test_store_response_uses_rehydrated_input_with_previous_response(
     openai_responses_impl, mock_responses_store, mock_inference_api
 ):
@@ -747,7 +734,6 @@ async def test_store_response_uses_rehydrated_input_with_previous_response(
     assert result.status == "completed"
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize(
     "text_format, response_format",
     [
@@ -787,7 +773,6 @@ async def test_create_openai_response_with_text_format(
     assert first_call.kwargs["response_format"] == response_format
 
 
-@pytest.mark.asyncio
 async def test_create_openai_response_with_invalid_text_format(openai_responses_impl, mock_inference_api):
     """Test creating an OpenAI response with an invalid text format."""
     # Setup
diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py
index 656d1e53c..26001fcf1 100644
--- a/tests/unit/providers/agents/test_persistence_access_control.py
+++ b/tests/unit/providers/agents/test_persistence_access_control.py
@@ -9,7 +9,6 @@ from datetime import datetime
 from unittest.mock import patch
 
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.agents import Turn
 from llama_stack.apis.inference import CompletionMessage, StopReason
@@ -17,13 +16,12 @@ from llama_stack.distribution.datatypes import User
 from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def test_setup(sqlite_kvstore):
     agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={})
     yield agent_persistence
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user")
 async def test_session_creation_with_access_attributes(mock_get_authenticated_user, test_setup):
     agent_persistence = test_setup
@@ -44,7 +42,6 @@ async def test_session_creation_with_access_attributes(mock_get_authenticated_us
     assert session_info.owner.attributes["teams"] == ["ai-team"]
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user")
 async def test_session_access_control(mock_get_authenticated_user, test_setup):
     agent_persistence = test_setup
@@ -79,7 +76,6 @@ async def test_session_access_control(mock_get_authenticated_user, test_setup):
     assert retrieved_session is None
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user")
 async def test_turn_access_control(mock_get_authenticated_user, test_setup):
     agent_persistence = test_setup
@@ -133,7 +129,6 @@ async def test_turn_access_control(mock_get_authenticated_user, test_setup):
         await agent_persistence.get_session_turns(session_id)
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user")
 async def test_tool_call_and_infer_iters_access_control(mock_get_authenticated_user, test_setup):
     agent_persistence = test_setup
diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py
new file mode 100644
index 000000000..c9a931d47
--- /dev/null
+++ b/tests/unit/providers/inference/test_inference_client_caching.py
@@ -0,0 +1,73 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import json
+from unittest.mock import MagicMock
+
+from llama_stack.distribution.request_headers import request_provider_data_context
+from llama_stack.providers.remote.inference.groq.config import GroqConfig
+from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter
+from llama_stack.providers.remote.inference.openai.config import OpenAIConfig
+from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter
+from llama_stack.providers.remote.inference.together.config import TogetherImplConfig
+from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter
+
+
+def test_groq_provider_openai_client_caching():
+    """Ensure the Groq provider does not cache api keys across client requests"""
+
+    config = GroqConfig()
+    inference_adapter = GroqInferenceAdapter(config)
+
+    inference_adapter.__provider_spec__ = MagicMock()
+    inference_adapter.__provider_spec__.provider_data_validator = (
+        "llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator"
+    )
+
+    for api_key in ["test1", "test2"]:
+        with request_provider_data_context(
+            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
+        ):
+            openai_client = inference_adapter._get_openai_client()
+            assert openai_client.api_key == api_key
+
+
+def test_openai_provider_openai_client_caching():
+    """Ensure the OpenAI provider does not cache api keys across client requests"""
+
+    config = OpenAIConfig()
+    inference_adapter = OpenAIInferenceAdapter(config)
+
+    inference_adapter.__provider_spec__ = MagicMock()
+    inference_adapter.__provider_spec__.provider_data_validator = (
+        "llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator"
+    )
+
+    for api_key in ["test1", "test2"]:
+        with request_provider_data_context(
+            {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})}
+        ):
+            openai_client = inference_adapter._get_openai_client()
+            assert openai_client.api_key == api_key
+
+
+def test_together_provider_openai_client_caching():
+    """Ensure the Together provider does not cache api keys across client requests"""
+
+    config = TogetherImplConfig()
+    inference_adapter = TogetherInferenceAdapter(config)
+
+    inference_adapter.__provider_spec__ = MagicMock()
+    inference_adapter.__provider_spec__.provider_data_validator = (
+        "llama_stack.providers.remote.inference.together.TogetherProviderDataValidator"
+    )
+
+    for api_key in ["test1", "test2"]:
+        with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"together_api_key": api_key})}):
+            together_client = inference_adapter._get_client()
+            assert together_client.client.api_key == api_key
+            openai_client = inference_adapter._get_openai_client()
+            assert openai_client.api_key == api_key
diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py
index eaa9b40da..5c2ad03ab 100644
--- a/tests/unit/providers/inference/test_remote_vllm.py
+++ b/tests/unit/providers/inference/test_remote_vllm.py
@@ -14,7 +14,6 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-import pytest_asyncio
 from openai.types.chat.chat_completion_chunk import (
     ChatCompletionChunk as OpenAIChatCompletionChunk,
 )
@@ -103,7 +102,7 @@ def mock_openai_models_list():
         yield mock_list
 
 
-@pytest_asyncio.fixture(scope="module")
+@pytest.fixture(scope="module")
 async def vllm_inference_adapter():
     config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
     inference_adapter = VLLMInferenceAdapter(config)
@@ -112,7 +111,6 @@ async def vllm_inference_adapter():
     return inference_adapter
 
 
-@pytest.mark.asyncio
 async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter):
     async def mock_openai_models():
         yield OpenAIModel(id="foo", created=1, object="model", owned_by="test")
@@ -125,7 +123,6 @@ async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inferenc
     mock_openai_models_list.assert_called()
 
 
-@pytest.mark.asyncio
 async def test_old_vllm_tool_choice(vllm_inference_adapter):
     """
     Test that we set tool_choice to none when no tools are in use
@@ -149,7 +146,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
         assert request.tool_config.tool_choice == ToolChoice.none
 
 
-@pytest.mark.asyncio
 async def test_tool_call_response(vllm_inference_adapter):
     """Verify that tool call arguments from a CompletionMessage are correctly converted
     into the expected JSON format."""
@@ -192,7 +188,6 @@ async def test_tool_call_response(vllm_inference_adapter):
         ]
 
 
-@pytest.mark.asyncio
 async def test_tool_call_delta_empty_tool_call_buf():
     """
     Test that we don't generate extra chunks when processing a
@@ -222,7 +217,6 @@ async def test_tool_call_delta_empty_tool_call_buf():
     assert chunks[1].event.stop_reason == StopReason.end_of_turn
 
 
-@pytest.mark.asyncio
 async def test_tool_call_delta_streaming_arguments_dict():
     async def mock_stream():
         mock_chunk_1 = OpenAIChatCompletionChunk(
@@ -297,7 +291,6 @@ async def test_tool_call_delta_streaming_arguments_dict():
     assert chunks[2].event.event_type.value == "complete"
 
 
-@pytest.mark.asyncio
 async def test_multiple_tool_calls():
     async def mock_stream():
         mock_chunk_1 = OpenAIChatCompletionChunk(
@@ -376,7 +369,6 @@ async def test_multiple_tool_calls():
     assert chunks[3].event.event_type.value == "complete"
 
 
-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_no_choices():
     """
     Test that we don't error out when vLLM returns no choices for a
@@ -401,6 +393,7 @@ async def test_process_vllm_chat_completion_stream_response_no_choices():
     assert chunks[0].event.event_type.value == "start"
 
 
+@pytest.mark.allow_network
 def test_chat_completion_doesnt_block_event_loop(caplog):
     loop = asyncio.new_event_loop()
     loop.set_debug(True)
@@ -453,7 +446,6 @@ def test_chat_completion_doesnt_block_event_loop(caplog):
     assert not asyncio_warnings
 
 
-@pytest.mark.asyncio
 async def test_get_params_empty_tools(vllm_inference_adapter):
     request = ChatCompletionRequest(
         tools=[],
@@ -464,7 +456,6 @@ async def test_get_params_empty_tools(vllm_inference_adapter):
     assert "tools" not in params
 
 
-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk():
     """
     Tests the edge case where the model returns the arguments for the tool call in the same chunk that
@@ -543,7 +534,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_
     assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
 
 
-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
     """
     Tests the edge case where the model requests a tool call and stays idle without explicitly providing the
@@ -596,7 +586,6 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason():
     assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments
 
 
-@pytest.mark.asyncio
 async def test_process_vllm_chat_completion_stream_response_tool_without_args():
     """
     Tests the edge case where no arguments are provided for the tool call.
@@ -645,7 +634,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args():
     assert chunks[-2].event.delta.tool_call.arguments == {}
 
 
-@pytest.mark.asyncio
 async def test_health_status_success(vllm_inference_adapter):
     """
     Test the health method of VLLM InferenceAdapter when the connection is successful.
@@ -679,7 +667,6 @@ async def test_health_status_success(vllm_inference_adapter):
         mock_models.list.assert_called_once()
 
 
-@pytest.mark.asyncio
 async def test_health_status_failure(vllm_inference_adapter):
     """
     Test the health method of VLLM InferenceAdapter when the connection fails.
diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py
index 3598e4810..f57f6c9b3 100644
--- a/tests/unit/providers/utils/inference/test_openai_compat.py
+++ b/tests/unit/providers/utils/inference/test_openai_compat.py
@@ -4,7 +4,6 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
 
-import pytest
 
 from llama_stack.apis.common.content_types import TextContentItem
 from llama_stack.apis.inference import (
@@ -23,7 +22,6 @@ from llama_stack.providers.utils.inference.openai_compat import (
 )
 
 
-@pytest.mark.asyncio
 async def test_convert_message_to_openai_dict():
     message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user")
     assert await convert_message_to_openai_dict(message) == {
@@ -33,7 +31,6 @@ async def test_convert_message_to_openai_dict():
 
 
 # Test convert_message_to_openai_dict with a tool call
-@pytest.mark.asyncio
 async def test_convert_message_to_openai_dict_with_tool_call():
     message = CompletionMessage(
         content="",
@@ -54,7 +51,6 @@ async def test_convert_message_to_openai_dict_with_tool_call():
     }
 
 
-@pytest.mark.asyncio
 async def test_convert_message_to_openai_dict_with_builtin_tool_call():
     message = CompletionMessage(
         content="",
@@ -80,7 +76,6 @@ async def test_convert_message_to_openai_dict_with_builtin_tool_call():
     }
 
 
-@pytest.mark.asyncio
 async def test_openai_messages_to_messages_with_content_str():
     openai_messages = [
         OpenAISystemMessageParam(content="system message"),
@@ -98,7 +93,6 @@ async def test_openai_messages_to_messages_with_content_str():
     assert llama_messages[2].content == "assistant message"
 
 
-@pytest.mark.asyncio
 async def test_openai_messages_to_messages_with_content_list():
     openai_messages = [
         OpenAISystemMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="system message")]),
diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py
index 220c21994..90b229262 100644
--- a/tests/unit/providers/utils/memory/test_vector_store.py
+++ b/tests/unit/providers/utils/memory/test_vector_store.py
@@ -13,7 +13,6 @@ from llama_stack.apis.tools import RAGDocument
 from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_url():
     """Test extracting content from RAGDocument with URL content."""
     mock_url = URL(uri="https://example.com")
@@ -33,7 +32,6 @@ async def test_content_from_doc_with_url():
         mock_instance.get.assert_called_once_with(mock_url.uri)
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_pdf_url():
     """Test extracting content from RAGDocument with URL pointing to a PDF."""
     mock_url = URL(uri="https://example.com/document.pdf")
@@ -58,7 +56,6 @@ async def test_content_from_doc_with_pdf_url():
         mock_parse_pdf.assert_called_once_with(b"PDF binary data")
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_data_url():
     """Test extracting content from RAGDocument with data URL content."""
     data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ="  # "Hello World" base64 encoded
@@ -74,7 +71,6 @@ async def test_content_from_doc_with_data_url():
         mock_content_from_data.assert_called_once_with(data_url)
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_string():
     """Test extracting content from RAGDocument with string content."""
     content_string = "This is plain text content"
@@ -85,7 +81,6 @@ async def test_content_from_doc_with_string():
     assert result == content_string
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_string_url():
     """Test extracting content from RAGDocument with string URL content."""
     url_string = "https://example.com"
@@ -105,7 +100,6 @@ async def test_content_from_doc_with_string_url():
         mock_instance.get.assert_called_once_with(url_string)
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_string_pdf_url():
     """Test extracting content from RAGDocument with string URL pointing to a PDF."""
     url_string = "https://example.com/document.pdf"
@@ -130,7 +124,6 @@ async def test_content_from_doc_with_string_pdf_url():
         mock_parse_pdf.assert_called_once_with(b"PDF binary data")
 
 
-@pytest.mark.asyncio
 async def test_content_from_doc_with_interleaved_content():
     """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit)."""
     interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")]
diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py
index 10fa1e075..e11f95d49 100644
--- a/tests/unit/providers/utils/test_model_registry.py
+++ b/tests/unit/providers/utils/test_model_registry.py
@@ -87,18 +87,15 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov
     return ModelRegistryHelper([known_provider_model, known_provider_model2])
 
 
-@pytest.mark.asyncio
 async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
     assert helper.get_provider_model_id(unknown_model.model_id) is None
 
 
-@pytest.mark.asyncio
 async def test_register_unknown_provider_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
     with pytest.raises(ValueError):
         await helper.register_model(unknown_model)
 
 
-@pytest.mark.asyncio
 async def test_register_model(helper: ModelRegistryHelper, known_model: Model) -> None:
     model = Model(
         provider_id=known_model.provider_id,
@@ -110,7 +107,6 @@ async def test_register_model(helper: ModelRegistryHelper, known_model: Model) -
     assert helper.get_provider_model_id(model.model_id) == model.provider_resource_id
 
 
-@pytest.mark.asyncio
 async def test_register_model_from_alias(helper: ModelRegistryHelper, known_model: Model) -> None:
     model = Model(
         provider_id=known_model.provider_id,
@@ -122,13 +118,11 @@ async def test_register_model_from_alias(helper: ModelRegistryHelper, known_mode
     assert helper.get_provider_model_id(model.model_id) == known_model.provider_resource_id
 
 
-@pytest.mark.asyncio
 async def test_register_model_existing(helper: ModelRegistryHelper, known_model: Model) -> None:
     await helper.register_model(known_model)
     assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_resource_id
 
 
-@pytest.mark.asyncio
 async def test_register_model_existing_different(
     helper: ModelRegistryHelper, known_model: Model, known_model2: Model
 ) -> None:
@@ -137,7 +131,6 @@ async def test_register_model_existing_different(
         await helper.register_model(known_model)
 
 
-@pytest.mark.asyncio
 async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None:
     await helper.register_model(known_model)  # duplicate entry
     assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id
@@ -145,18 +138,15 @@ async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model)
     assert helper.get_provider_model_id(known_model.model_id) is None
 
 
-@pytest.mark.asyncio
 async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None:
     with pytest.raises(ValueError):
         await helper.unregister_model(unknown_model.model_id)
 
 
-@pytest.mark.asyncio
 async def test_register_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
     assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
 
 
-@pytest.mark.asyncio
 async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None:
     assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id
     await helper.unregister_model(known_model.provider_resource_id)
diff --git a/tests/unit/providers/utils/test_scheduler.py b/tests/unit/providers/utils/test_scheduler.py
index 25b4935de..e5ee74bfa 100644
--- a/tests/unit/providers/utils/test_scheduler.py
+++ b/tests/unit/providers/utils/test_scheduler.py
@@ -11,7 +11,6 @@ import pytest
 from llama_stack.providers.utils.scheduler import JobStatus, Scheduler
 
 
-@pytest.mark.asyncio
 async def test_scheduler_unknown_backend():
     with pytest.raises(ValueError):
         Scheduler(backend="unknown")
@@ -26,7 +25,6 @@ async def wait_for_job_completed(sched: Scheduler, job_id: str) -> None:
     raise TimeoutError(f"Job {job_id} did not complete in time.")
 
 
-@pytest.mark.asyncio
 async def test_scheduler_naive():
     sched = Scheduler()
 
@@ -87,7 +85,6 @@ async def test_scheduler_naive():
     assert job.logs[0][0] < job.logs[1][0]
 
 
-@pytest.mark.asyncio
 async def test_scheduler_naive_handler_raises():
     sched = Scheduler()
 
diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py
index 8348b84e3..90108d7a0 100644
--- a/tests/unit/providers/vector_io/test_faiss.py
+++ b/tests/unit/providers/vector_io/test_faiss.py
@@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import numpy as np
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.files import Files
 from llama_stack.apis.inference import EmbeddingsResponse, Inference
@@ -91,13 +90,13 @@ def faiss_config():
     return config
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def faiss_index(embedding_dimension):
     index = await FaissIndex.create(dimension=embedding_dimension)
     yield index
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter:
     # Create the adapter
     adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api)
@@ -113,7 +112,6 @@ async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> Fai
         yield adapter
 
 
-@pytest.mark.asyncio
 async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical(
     faiss_index, sample_chunks, sample_embeddings, embedding_dimension
 ):
@@ -136,7 +134,6 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_
         assert response.chunks[1] == sample_chunks[1]
 
 
-@pytest.mark.asyncio
 async def test_health_success():
     """Test that the health check returns OK status when faiss is working correctly."""
     # Create a fresh instance of FaissVectorIOAdapter for testing
@@ -160,7 +157,6 @@ async def test_health_success():
         mock_index_flat.assert_called_once_with(128)  # VECTOR_DIMENSION is 128
 
 
-@pytest.mark.asyncio
 async def test_health_failure():
     """Test that the health check returns ERROR status when faiss encounters an error."""
     # Create a fresh instance of FaissVectorIOAdapter for testing
diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py
index 6902c8850..d3ffe711c 100644
--- a/tests/unit/providers/vector_io/test_qdrant.py
+++ b/tests/unit/providers/vector_io/test_qdrant.py
@@ -10,7 +10,6 @@ from typing import Any
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.inference import EmbeddingsResponse, Inference
 from llama_stack.apis.vector_io import (
@@ -68,7 +67,7 @@ def mock_api_service(sample_embeddings):
     return mock_api_service
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter:
     adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service)
     adapter.vector_db_store = mock_vector_db_store
@@ -80,7 +79,6 @@ async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service,
 __QUERY = "Sample query"
 
 
-@pytest.mark.asyncio
 @pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)])
 async def test_qdrant_adapter_returns_expected_chunks(
     qdrant_adapter: QdrantVectorIOAdapter,
@@ -111,7 +109,6 @@ def _prepare_for_json(value: Any) -> str:
 
 
 @patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json)
-@pytest.mark.asyncio
 async def test_qdrant_register_and_unregister_vector_db(
     qdrant_adapter: QdrantVectorIOAdapter,
     mock_vector_db,
diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py
index 8579c31bb..a61eeeeca 100644
--- a/tests/unit/providers/vector_io/test_sqlite_vec.py
+++ b/tests/unit/providers/vector_io/test_sqlite_vec.py
@@ -8,7 +8,6 @@ import asyncio
 
 import numpy as np
 import pytest
-import pytest_asyncio
 
 from llama_stack.apis.vector_io import Chunk, QueryChunksResponse
 from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import (
@@ -34,7 +33,7 @@ def loop():
     return asyncio.new_event_loop()
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def sqlite_vec_index(embedding_dimension, tmp_path_factory):
     temp_dir = tmp_path_factory.getbasetemp()
     db_path = str(temp_dir / "test_sqlite.db")
@@ -43,14 +42,12 @@ async def sqlite_vec_index(embedding_dimension, tmp_path_factory):
     await index.delete()
 
 
-@pytest.mark.asyncio
 async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata):
     await sqlite_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata)
     response = await sqlite_vec_index.query_vector(sample_embeddings_with_metadata[-1], k=2, score_threshold=0.0)
     assert response.chunks[0].chunk_metadata == sample_chunks_with_metadata[-1].chunk_metadata
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
     query_string = "Sentence 5"
@@ -68,7 +65,6 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa
     assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}"
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
 
@@ -90,7 +86,6 @@ async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embed
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings):
     # Re-initialize with a clean index
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -103,7 +98,6 @@ async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_i
     assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found"
 
 
-@pytest.mark.asyncio
 async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension):
     """Test that chunk IDs do not conflict across batches when inserting chunks."""
     # Reduce batch size to force multiple batches for same document
@@ -134,7 +128,6 @@ async def sqlite_vec_adapter(sqlite_connection):
     await adapter.shutdown()
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test hybrid search when keyword search returns no matches - should still return vector results."""
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -163,7 +156,6 @@ async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_c
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test hybrid search with a high score threshold."""
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -185,7 +177,6 @@ async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chun
     assert len(response.chunks) == 0
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_different_embedding(
     sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension
 ):
@@ -211,7 +202,6 @@ async def test_query_chunks_hybrid_different_embedding(
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test that RRF properly combines rankings when documents appear in both search methods."""
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -236,7 +226,6 @@ async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks,
     assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1))
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
 
@@ -284,7 +273,6 @@ async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chun
     assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6)  # Should behave like RRF
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test hybrid search with documents that appear in only one search method."""
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -313,7 +301,6 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks
     assert "document-2" in doc_ids  # From keyword search
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_weighted_reranker_parametrization(
     sqlite_vec_index, sample_chunks, sample_embeddings
 ):
@@ -369,7 +356,6 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization(
     )
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_chunks, sample_embeddings):
     """Test RRFReRanker with different impact factors."""
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
@@ -401,7 +387,6 @@ async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_ch
     assert response.scores[0] == pytest.approx(2.0 / 101.0, rel=1e-6)
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, sample_embeddings):
     await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings)
 
@@ -445,7 +430,6 @@ async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, s
     assert len(response.chunks) <= 100
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_hybrid_tie_breaking(
     sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory
 ):
diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
index 0ad98405e..bf7663d2e 100644
--- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
+++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py
@@ -25,12 +25,10 @@ from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREF
 # -v -s --tb=short --disable-warnings --asyncio-mode=auto
 
 
-@pytest.mark.asyncio
 async def test_initialize_index(vector_index):
     await vector_index.initialize()
 
 
-@pytest.mark.asyncio
 async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embeddings):
     vector_index.delete()
     vector_index.initialize()
@@ -40,7 +38,6 @@ async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embed
     vector_index.delete()
 
 
-@pytest.mark.asyncio
 async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimension):
     embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32)
     await vector_index.add_chunks(sample_chunks, embeddings)
@@ -54,7 +51,6 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio
     assert len(contents) == len(set(contents))
 
 
-@pytest.mark.asyncio
 async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
     key = f"{VECTOR_DBS_PREFIX}db1"
     dummy = VectorDB(
@@ -65,7 +61,6 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter):
     await vector_io_adapter.initialize()
 
 
-@pytest.mark.asyncio
 async def test_persistence_across_adapter_restarts(vector_io_adapter):
     await vector_io_adapter.initialize()
     dummy = VectorDB(
@@ -79,7 +74,6 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter):
     await vector_io_adapter.shutdown()
 
 
-@pytest.mark.asyncio
 async def test_register_and_unregister_vector_db(vector_io_adapter):
     unique_id = f"foo_db_{np.random.randint(1e6)}"
     dummy = VectorDB(
@@ -92,14 +86,12 @@ async def test_register_and_unregister_vector_db(vector_io_adapter):
     assert dummy.identifier not in vector_io_adapter.cache
 
 
-@pytest.mark.asyncio
 async def test_query_unregistered_raises(vector_io_adapter):
     fake_emb = np.zeros(8, dtype=np.float32)
     with pytest.raises(ValueError):
         await vector_io_adapter.query_chunks("no_such_db", fake_emb)
 
 
-@pytest.mark.asyncio
 async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
     fake_index = AsyncMock()
     vector_io_adapter.cache["db1"] = fake_index
@@ -110,7 +102,6 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter):
     fake_index.insert_chunks.assert_awaited_once_with(chunks)
 
 
-@pytest.mark.asyncio
 async def test_insert_chunks_missing_db_raises(vector_io_adapter):
     vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
 
@@ -118,7 +109,6 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter):
         await vector_io_adapter.insert_chunks("db_not_exist", [])
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter):
     expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1])
     fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected))
@@ -130,7 +120,6 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter
     assert response is expected
 
 
-@pytest.mark.asyncio
 async def test_query_chunks_missing_db_raises(vector_io_adapter):
     vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None)
 
@@ -138,7 +127,6 @@ async def test_query_chunks_missing_db_raises(vector_io_adapter):
         await vector_io_adapter.query_chunks("db_missing", "q", None)
 
 
-@pytest.mark.asyncio
 async def test_save_openai_vector_store(vector_io_adapter):
     store_id = "vs_1234"
     openai_vector_store = {
@@ -155,7 +143,6 @@ async def test_save_openai_vector_store(vector_io_adapter):
     assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store
 
 
-@pytest.mark.asyncio
 async def test_update_openai_vector_store(vector_io_adapter):
     store_id = "vs_1234"
     openai_vector_store = {
@@ -172,7 +159,6 @@ async def test_update_openai_vector_store(vector_io_adapter):
     assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store
 
 
-@pytest.mark.asyncio
 async def test_delete_openai_vector_store(vector_io_adapter):
     store_id = "vs_1234"
     openai_vector_store = {
@@ -188,7 +174,6 @@ async def test_delete_openai_vector_store(vector_io_adapter):
     assert openai_vector_store["id"] not in vector_io_adapter.openai_vector_stores
 
 
-@pytest.mark.asyncio
 async def test_load_openai_vector_stores(vector_io_adapter):
     store_id = "vs_1234"
     openai_vector_store = {
@@ -204,7 +189,6 @@ async def test_load_openai_vector_stores(vector_io_adapter):
     assert loaded_stores[store_id] == openai_vector_store
 
 
-@pytest.mark.asyncio
 async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory):
     store_id = "vs_1234"
     file_id = "file_1234"
@@ -226,7 +210,6 @@ async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory
     await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents)
 
 
-@pytest.mark.asyncio
 async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_factory):
     store_id = "vs_1234"
     file_id = "file_1234"
@@ -260,7 +243,6 @@ async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_facto
     assert loaded_contents != file_info
 
 
-@pytest.mark.asyncio
 async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_path_factory):
     store_id = "vs_1234"
     file_id = "file_1234"
@@ -284,7 +266,6 @@ async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_pat
     assert loaded_contents == file_contents
 
 
-@pytest.mark.asyncio
 async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, tmp_path_factory):
     store_id = "vs_1234"
     file_id = "file_1234"
diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py
index d2dd1783b..b2baa744a 100644
--- a/tests/unit/rag/test_rag_query.py
+++ b/tests/unit/rag/test_rag_query.py
@@ -17,13 +17,11 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti
 
 
 class TestRagQuery:
-    @pytest.mark.asyncio
     async def test_query_raises_on_empty_vector_db_ids(self):
         rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
         with pytest.raises(ValueError):
             await rag_tool.query(content=MagicMock(), vector_db_ids=[])
 
-    @pytest.mark.asyncio
     async def test_query_chunk_metadata_handling(self):
         rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock())
         content = "test query content"
diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py
index 9d6b9ee67..919f97ba7 100644
--- a/tests/unit/rag/test_vector_store.py
+++ b/tests/unit/rag/test_vector_store.py
@@ -112,7 +112,6 @@ class TestValidateEmbedding:
 
 
 class TestVectorStore:
-    @pytest.mark.asyncio
     async def test_returns_content_from_pdf_data_uri(self):
         data_uri = data_url_from_file(DUMMY_PDF_PATH)
         doc = RAGDocument(
@@ -124,7 +123,7 @@ class TestVectorStore:
         content = await content_from_doc(doc)
         assert content in DUMMY_PDF_TEXT_CHOICES
 
-    @pytest.mark.asyncio
+    @pytest.mark.allow_network
     async def test_downloads_pdf_and_returns_content(self):
         # Using GitHub to host the PDF file
         url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
@@ -137,7 +136,7 @@ class TestVectorStore:
         content = await content_from_doc(doc)
         assert content in DUMMY_PDF_TEXT_CHOICES
 
-    @pytest.mark.asyncio
+    @pytest.mark.allow_network
     async def test_downloads_pdf_and_returns_content_with_url_object(self):
         # Using GitHub to host the PDF file
         url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf"
@@ -204,7 +203,6 @@ class TestVectorStore:
 
 
 class TestVectorDBWithIndex:
-    @pytest.mark.asyncio
     async def test_insert_chunks_without_embeddings(self):
         mock_vector_db = MagicMock()
         mock_vector_db.embedding_model = "test-model without embeddings"
@@ -230,7 +228,6 @@ class TestVectorDBWithIndex:
         assert args[0] == chunks
         assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
 
-    @pytest.mark.asyncio
     async def test_insert_chunks_with_valid_embeddings(self):
         mock_vector_db = MagicMock()
         mock_vector_db.embedding_model = "test-model with embeddings"
@@ -255,7 +252,6 @@ class TestVectorDBWithIndex:
         assert args[0] == chunks
         assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32))
 
-    @pytest.mark.asyncio
     async def test_insert_chunks_with_invalid_embeddings(self):
         mock_vector_db = MagicMock()
         mock_vector_db.embedding_dimension = 3
@@ -295,7 +291,6 @@ class TestVectorDBWithIndex:
         mock_inference_api.embeddings.assert_not_called()
         mock_index.add_chunks.assert_not_called()
 
-    @pytest.mark.asyncio
     async def test_insert_chunks_with_partially_precomputed_embeddings(self):
         mock_vector_db = MagicMock()
         mock_vector_db.embedding_model = "test-model with partial embeddings"
diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py
index 909581bb7..87fe18d54 100644
--- a/tests/unit/registry/test_registry.py
+++ b/tests/unit/registry/test_registry.py
@@ -38,14 +38,12 @@ def sample_model():
     )
 
 
-@pytest.mark.asyncio
 async def test_registry_initialization(disk_dist_registry):
     # Test empty registry
     result = await disk_dist_registry.get("nonexistent", "nonexistent")
     assert result is None
 
 
-@pytest.mark.asyncio
 async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model):
     print(f"Registering {sample_vector_db}")
     await disk_dist_registry.register(sample_vector_db)
@@ -64,7 +62,6 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m
     assert result_model.provider_id == sample_model.provider_id
 
 
-@pytest.mark.asyncio
 async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model):
     # First populate the disk registry
     disk_registry = DiskDistributionRegistry(sqlite_kvstore)
@@ -85,7 +82,6 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db,
     assert result_vector_db.provider_id == sample_vector_db.provider_id
 
 
-@pytest.mark.asyncio
 async def test_cached_registry_updates(cached_disk_dist_registry):
     new_vector_db = VectorDB(
         identifier="test_vector_db_2",
@@ -112,7 +108,6 @@ async def test_cached_registry_updates(cached_disk_dist_registry):
     assert result_vector_db.provider_id == new_vector_db.provider_id
 
 
-@pytest.mark.asyncio
 async def test_duplicate_provider_registration(cached_disk_dist_registry):
     original_vector_db = VectorDB(
         identifier="test_vector_db_2",
@@ -137,7 +132,6 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry):
     assert result.embedding_model == original_vector_db.embedding_model  # Original values preserved
 
 
-@pytest.mark.asyncio
 async def test_get_all_objects(cached_disk_dist_registry):
     # Create multiple test banks
     # Create multiple test banks
@@ -170,7 +164,6 @@ async def test_get_all_objects(cached_disk_dist_registry):
         assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension
 
 
-@pytest.mark.asyncio
 async def test_parse_registry_values_error_handling(sqlite_kvstore):
     valid_db = VectorDB(
         identifier="valid_vector_db",
@@ -209,7 +202,6 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore):
     assert invalid_obj is None
 
 
-@pytest.mark.asyncio
 async def test_cached_registry_error_handling(sqlite_kvstore):
     valid_db = VectorDB(
         identifier="valid_cached_db",
diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py
index 48b3ac51b..6cfb20944 100644
--- a/tests/unit/registry/test_registry_acl.py
+++ b/tests/unit/registry/test_registry_acl.py
@@ -5,14 +5,11 @@
 # the root directory of this source tree.
 
 
-import pytest
-
 from llama_stack.apis.models import ModelType
 from llama_stack.distribution.datatypes import ModelWithOwner, User
 from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry
 
 
-@pytest.mark.asyncio
 async def test_registry_cache_with_acl(cached_disk_dist_registry):
     model = ModelWithOwner(
         identifier="model-acl",
@@ -48,7 +45,6 @@ async def test_registry_cache_with_acl(cached_disk_dist_registry):
     assert new_model.owner.attributes["teams"] == ["ai-team"]
 
 
-@pytest.mark.asyncio
 async def test_registry_empty_acl(cached_disk_dist_registry):
     model = ModelWithOwner(
         identifier="model-empty-acl",
@@ -85,7 +81,6 @@ async def test_registry_empty_acl(cached_disk_dist_registry):
     assert len(all_models) == 2
 
 
-@pytest.mark.asyncio
 async def test_registry_serialization(cached_disk_dist_registry):
     attributes = {
         "roles": ["admin", "researcher"],
diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py
index af03ddacb..fb9c6f95e 100644
--- a/tests/unit/server/test_access_control.py
+++ b/tests/unit/server/test_access_control.py
@@ -7,7 +7,6 @@
 from unittest.mock import MagicMock, Mock, patch
 
 import pytest
-import pytest_asyncio
 import yaml
 from pydantic import TypeAdapter, ValidationError
 
@@ -27,7 +26,7 @@ def _return_model(model):
     return model
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def test_setup(cached_disk_dist_registry):
     mock_inference = Mock()
     mock_inference.__provider_spec__ = MagicMock()
@@ -41,7 +40,6 @@ async def test_setup(cached_disk_dist_registry):
     yield cached_disk_dist_registry, routing_table
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_access_control_with_cache(mock_get_authenticated_user, test_setup):
     registry, routing_table = test_setup
@@ -106,7 +104,6 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup
         await routing_table.get_model("model-admin")
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_access_control_and_updates(mock_get_authenticated_user, test_setup):
     registry, routing_table = test_setup
@@ -145,7 +142,6 @@ async def test_access_control_and_updates(mock_get_authenticated_user, test_setu
     assert model.identifier == "model-updates"
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_access_control_empty_attributes(mock_get_authenticated_user, test_setup):
     registry, routing_table = test_setup
@@ -170,7 +166,6 @@ async def test_access_control_empty_attributes(mock_get_authenticated_user, test
     assert "model-empty-attrs" in model_ids
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_no_user_attributes(mock_get_authenticated_user, test_setup):
     registry, routing_table = test_setup
@@ -201,7 +196,6 @@ async def test_no_user_attributes(mock_get_authenticated_user, test_setup):
     assert all_models.data[0].identifier == "model-public-2"
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_automatic_access_attributes(mock_get_authenticated_user, test_setup):
     """Test that newly created resources inherit access attributes from their creator."""
@@ -246,7 +240,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set
     assert model.identifier == "auto-access-model"
 
 
-@pytest_asyncio.fixture
+@pytest.fixture
 async def test_setup_with_access_policy(cached_disk_dist_registry):
     mock_inference = Mock()
     mock_inference.__provider_spec__ = MagicMock()
@@ -281,7 +275,6 @@ async def test_setup_with_access_policy(cached_disk_dist_registry):
     yield routing_table
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user")
 async def test_access_policy(mock_get_authenticated_user, test_setup_with_access_policy):
     routing_table = test_setup_with_access_policy
diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py
index 39d6af1c8..7012a7f17 100644
--- a/tests/unit/server/test_auth.py
+++ b/tests/unit/server/test_auth.py
@@ -202,7 +202,6 @@ def test_http_auth_request_payload(http_client, valid_api_key, mock_auth_endpoin
         assert "param2" in payload["request"]["params"]
 
 
-@pytest.mark.asyncio
 async def test_http_middleware_with_access_attributes(mock_http_middleware, mock_scope):
     """Test HTTP middleware behavior with access attributes"""
     middleware, mock_app = mock_http_middleware
diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py
index acf4da0a3..a348590b1 100644
--- a/tests/unit/server/test_resolver.py
+++ b/tests/unit/server/test_resolver.py
@@ -9,7 +9,6 @@ import sys
 from typing import Any, Protocol
 from unittest.mock import AsyncMock, MagicMock
 
-import pytest
 from pydantic import BaseModel, Field
 
 from llama_stack.apis.inference import Inference
@@ -66,7 +65,6 @@ class SampleImpl:
         pass
 
 
-@pytest.mark.asyncio
 async def test_resolve_impls_basic():
     # Create a real provider spec
     provider_spec = InlineProviderSpec(
diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py
index 60e9f4609..d42857186 100644
--- a/tests/unit/server/test_sse.py
+++ b/tests/unit/server/test_sse.py
@@ -7,13 +7,10 @@
 import asyncio
 from unittest.mock import AsyncMock, MagicMock
 
-import pytest
-
 from llama_stack.apis.common.responses import PaginatedResponse
 from llama_stack.distribution.server.server import create_dynamic_typed_route, create_sse_event, sse_generator
 
 
-@pytest.mark.asyncio
 async def test_sse_generator_basic():
     # An AsyncIterator wrapped in an Awaitable, just like our web methods
     async def async_event_gen():
@@ -35,7 +32,6 @@ async def test_sse_generator_basic():
     assert seen_events[1] == create_sse_event("Test event 2")
 
 
-@pytest.mark.asyncio
 async def test_sse_generator_client_disconnected():
     # An AsyncIterator wrapped in an Awaitable, just like our web methods
     async def async_event_gen():
@@ -58,7 +54,6 @@ async def test_sse_generator_client_disconnected():
     assert seen_events[0] == create_sse_event("Test event 1")
 
 
-@pytest.mark.asyncio
 async def test_sse_generator_client_disconnected_before_response_starts():
     # Disconnect before the response starts
     async def async_event_gen():
@@ -75,7 +70,6 @@ async def test_sse_generator_client_disconnected_before_response_starts():
     assert len(seen_events) == 0
 
 
-@pytest.mark.asyncio
 async def test_sse_generator_error_before_response_starts():
     # Raise an error before the response starts
     async def async_event_gen():
@@ -93,7 +87,6 @@ async def test_sse_generator_error_before_response_starts():
     assert 'data: {"error":' in seen_events[0]
 
 
-@pytest.mark.asyncio
 async def test_paginated_response_url_setting():
     """Test that PaginatedResponse gets url set to route path."""
 
diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py
index de619c760..730f54a05 100644
--- a/tests/unit/utils/inference/test_inference_store.py
+++ b/tests/unit/utils/inference/test_inference_store.py
@@ -42,7 +42,6 @@ def create_test_chat_completion(
     )
 
 
-@pytest.mark.asyncio
 async def test_inference_store_pagination_basic():
     """Test basic pagination functionality."""
     with TemporaryDirectory() as tmp_dir:
@@ -88,7 +87,6 @@ async def test_inference_store_pagination_basic():
         assert result3.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_inference_store_pagination_ascending():
     """Test pagination with ascending order."""
     with TemporaryDirectory() as tmp_dir:
@@ -123,7 +121,6 @@ async def test_inference_store_pagination_ascending():
         assert result2.has_more is True
 
 
-@pytest.mark.asyncio
 async def test_inference_store_pagination_with_model_filter():
     """Test pagination combined with model filtering."""
     with TemporaryDirectory() as tmp_dir:
@@ -161,7 +158,6 @@ async def test_inference_store_pagination_with_model_filter():
         assert result2.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_inference_store_pagination_invalid_after():
     """Test error handling for invalid 'after' parameter."""
     with TemporaryDirectory() as tmp_dir:
@@ -174,7 +170,6 @@ async def test_inference_store_pagination_invalid_after():
             await store.list_chat_completions(after="non-existent", limit=2)
 
 
-@pytest.mark.asyncio
 async def test_inference_store_pagination_no_limit():
     """Test pagination behavior when no limit is specified."""
     with TemporaryDirectory() as tmp_dir:
diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py
index 3f25e2524..44d4b30da 100644
--- a/tests/unit/utils/responses/test_responses_store.py
+++ b/tests/unit/utils/responses/test_responses_store.py
@@ -44,7 +44,6 @@ def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInp
     )
 
 
-@pytest.mark.asyncio
 async def test_responses_store_pagination_basic():
     """Test basic pagination functionality for responses store."""
     with TemporaryDirectory() as tmp_dir:
@@ -90,7 +89,6 @@ async def test_responses_store_pagination_basic():
         assert result3.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_responses_store_pagination_ascending():
     """Test pagination with ascending order."""
     with TemporaryDirectory() as tmp_dir:
@@ -125,7 +123,6 @@ async def test_responses_store_pagination_ascending():
         assert result2.has_more is True
 
 
-@pytest.mark.asyncio
 async def test_responses_store_pagination_with_model_filter():
     """Test pagination combined with model filtering."""
     with TemporaryDirectory() as tmp_dir:
@@ -163,7 +160,6 @@ async def test_responses_store_pagination_with_model_filter():
         assert result2.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_responses_store_pagination_invalid_after():
     """Test error handling for invalid 'after' parameter."""
     with TemporaryDirectory() as tmp_dir:
@@ -176,7 +172,6 @@ async def test_responses_store_pagination_invalid_after():
             await store.list_responses(after="non-existent", limit=2)
 
 
-@pytest.mark.asyncio
 async def test_responses_store_pagination_no_limit():
     """Test pagination behavior when no limit is specified."""
     with TemporaryDirectory() as tmp_dir:
@@ -205,7 +200,6 @@ async def test_responses_store_pagination_no_limit():
         assert result.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_responses_store_get_response_object():
     """Test retrieving a single response object."""
     with TemporaryDirectory() as tmp_dir:
@@ -230,7 +224,6 @@ async def test_responses_store_get_response_object():
             await store.get_response_object("non-existent")
 
 
-@pytest.mark.asyncio
 async def test_responses_store_input_items_pagination():
     """Test pagination functionality for input items."""
     with TemporaryDirectory() as tmp_dir:
@@ -308,7 +301,6 @@ async def test_responses_store_input_items_pagination():
             await store.list_response_input_items("test-resp", before="some-id", after="other-id")
 
 
-@pytest.mark.asyncio
 async def test_responses_store_input_items_before_pagination():
     """Test before pagination functionality for input items."""
     with TemporaryDirectory() as tmp_dir:
diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py
index c4230a396..778f0b658 100644
--- a/tests/unit/utils/sqlstore/test_sqlstore.py
+++ b/tests/unit/utils/sqlstore/test_sqlstore.py
@@ -14,7 +14,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 
 
-@pytest.mark.asyncio
 async def test_sqlite_sqlstore():
     with TemporaryDirectory() as tmp_dir:
         db_name = "test.db"
@@ -66,7 +65,6 @@ async def test_sqlite_sqlstore():
         assert result.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_basic():
     """Test basic pagination functionality at the SQL store level."""
     with TemporaryDirectory() as tmp_dir:
@@ -131,7 +129,6 @@ async def test_sqlstore_pagination_basic():
         assert result3.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_with_filter():
     """Test pagination with WHERE conditions."""
     with TemporaryDirectory() as tmp_dir:
@@ -184,7 +181,6 @@ async def test_sqlstore_pagination_with_filter():
         assert result2.has_more is False
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_ascending_order():
     """Test pagination with ascending order."""
     with TemporaryDirectory() as tmp_dir:
@@ -233,7 +229,6 @@ async def test_sqlstore_pagination_ascending_order():
         assert result2.has_more is True
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_multi_column_ordering_error():
     """Test that multi-column ordering raises an error when using cursor pagination."""
     with TemporaryDirectory() as tmp_dir:
@@ -271,7 +266,6 @@ async def test_sqlstore_pagination_multi_column_ordering_error():
         assert result.data[0]["id"] == "task1"
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_cursor_requires_order_by():
     """Test that cursor pagination requires order_by parameter."""
     with TemporaryDirectory() as tmp_dir:
@@ -289,7 +283,6 @@ async def test_sqlstore_pagination_cursor_requires_order_by():
             )
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_error_handling():
     """Test error handling for invalid columns and cursor IDs."""
     with TemporaryDirectory() as tmp_dir:
@@ -339,7 +332,6 @@ async def test_sqlstore_pagination_error_handling():
             )
 
 
-@pytest.mark.asyncio
 async def test_sqlstore_pagination_custom_key_column():
     """Test pagination with custom primary key column (not 'id')."""
     with TemporaryDirectory() as tmp_dir:
diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py
index 61763719a..066f67a98 100644
--- a/tests/unit/utils/test_authorized_sqlstore.py
+++ b/tests/unit/utils/test_authorized_sqlstore.py
@@ -7,8 +7,6 @@
 from tempfile import TemporaryDirectory
 from unittest.mock import patch
 
-import pytest
-
 from llama_stack.distribution.access_control.access_control import default_policy, is_action_allowed
 from llama_stack.distribution.access_control.datatypes import Action
 from llama_stack.distribution.datatypes import User
@@ -18,7 +16,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS
 from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_fetch_with_where_sql_access_control(mock_get_authenticated_user):
     """Test that fetch_all works correctly with where_sql for access control"""
@@ -81,7 +78,6 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic
         assert row["title"] == "User Document"
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_sql_policy_consistency(mock_get_authenticated_user):
     """Test that SQL WHERE clause logic exactly matches is_action_allowed policy logic"""
@@ -168,7 +164,6 @@ async def test_sql_policy_consistency(mock_get_authenticated_user):
             )
 
 
-@pytest.mark.asyncio
 @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user")
 async def test_authorized_store_user_attribute_capture(mock_get_authenticated_user):
     """Test that user attributes are properly captured during insert"""
diff --git a/uv.lock b/uv.lock
index e77fb89f5..83e502e7f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -615,6 +615,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607, upload-time = "2025-03-13T11:52:41.757Z" },
 ]
 
+[[package]]
+name = "eval-type-backport"
+version = "0.2.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079, upload-time = "2024-12-21T20:09:46.005Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" },
+]
+
 [[package]]
 name = "executing"
 version = "2.2.0"
@@ -1238,6 +1247,28 @@ version = "1.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749, upload-time = "2009-11-12T21:42:00.934Z" }
 
+[[package]]
+name = "litellm"
+version = "1.74.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "click" },
+    { name = "httpx" },
+    { name = "importlib-metadata" },
+    { name = "jinja2" },
+    { name = "jsonschema" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "tiktoken" },
+    { name = "tokenizers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/38/10/63cdae1b1d581ad1db51153dfd06c4e18394a3ba8de495f73f2d797ece3b/litellm-1.74.2.tar.gz", hash = "sha256:cbacffe93976c60ca674fec0a942c70b900b4ad1c8069395174049a162f255bf", size = 9230641, upload-time = "2025-07-11T03:31:07.925Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/f7/67689245f48b9e79bcd2f3a10a3690cb1918fb99fffd5a623ed2496bca66/litellm-1.74.2-py3-none-any.whl", hash = "sha256:29bb555b45128e4cc696e72921a6ec24e97b14e9b69e86eed6f155124ad629b1", size = 8587065, upload-time = "2025-07-11T03:31:05.598Z" },
+]
+
 [[package]]
 name = "llama-stack"
 version = "0.2.14"
@@ -1293,6 +1324,7 @@ dev = [
     { name = "pytest-cov" },
     { name = "pytest-html" },
     { name = "pytest-json-report" },
+    { name = "pytest-socket" },
     { name = "pytest-timeout" },
     { name = "ruamel-yaml" },
     { name = "ruff" },
@@ -1341,6 +1373,7 @@ unit = [
     { name = "blobfile" },
     { name = "chardet" },
     { name = "faiss-cpu" },
+    { name = "litellm" },
     { name = "mcp" },
     { name = "openai" },
     { name = "pymilvus" },
@@ -1348,6 +1381,7 @@ unit = [
     { name = "qdrant-client" },
     { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "sqlite-vec" },
+    { name = "together" },
 ]
 
 [package.metadata]
@@ -1365,8 +1399,8 @@ requires-dist = [
     { name = "llama-stack-client", specifier = ">=0.2.14" },
     { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.14" },
     { name = "openai", specifier = ">=1.66" },
-    { name = "opentelemetry-exporter-otlp-proto-http" },
-    { name = "opentelemetry-sdk" },
+    { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" },
+    { name = "opentelemetry-sdk", specifier = ">=1.30.0" },
     { name = "pandas", marker = "extra == 'ui'" },
     { name = "pillow" },
     { name = "prompt-toolkit" },
@@ -1394,11 +1428,12 @@ dev = [
     { name = "black" },
     { name = "nbval" },
     { name = "pre-commit" },
-    { name = "pytest" },
-    { name = "pytest-asyncio" },
+    { name = "pytest", specifier = ">=8.4" },
+    { name = "pytest-asyncio", specifier = ">=1.0" },
     { name = "pytest-cov" },
     { name = "pytest-html" },
     { name = "pytest-json-report" },
+    { name = "pytest-socket" },
     { name = "pytest-timeout" },
     { name = "ruamel-yaml" },
     { name = "ruff" },
@@ -1446,6 +1481,7 @@ unit = [
     { name = "blobfile" },
     { name = "chardet" },
     { name = "faiss-cpu" },
+    { name = "litellm" },
     { name = "mcp" },
     { name = "openai" },
     { name = "pymilvus", specifier = ">=2.5.12" },
@@ -1454,6 +1490,7 @@ unit = [
     { name = "sqlalchemy" },
     { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" },
     { name = "sqlite-vec" },
+    { name = "together" },
 ]
 
 [[package]]
@@ -2432,29 +2469,30 @@ wheels = [
 
 [[package]]
 name = "pytest"
-version = "8.3.4"
+version = "8.4.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
+    { name = "pygments" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919, upload-time = "2024-12-01T12:54:25.98Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083, upload-time = "2024-12-01T12:54:19.735Z" },
+    { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" },
 ]
 
 [[package]]
 name = "pytest-asyncio"
-version = "0.25.3"
+version = "1.0.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "pytest" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239, upload-time = "2025-01-28T18:37:58.729Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960, upload-time = "2025-05-26T04:54:40.484Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467, upload-time = "2025-01-28T18:37:56.798Z" },
+    { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976, upload-time = "2025-05-26T04:54:39.035Z" },
 ]
 
 [[package]]
@@ -2509,6 +2547,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl", hash = "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", size = 11428, upload-time = "2024-02-12T19:38:42.531Z" },
 ]
 
+[[package]]
+name = "pytest-socket"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/05/ff/90c7e1e746baf3d62ce864c479fd53410b534818b9437413903596f81580/pytest_socket-0.7.0.tar.gz", hash = "sha256:71ab048cbbcb085c15a4423b73b619a8b35d6a307f46f78ea46be51b1b7e11b3", size = 12389, upload-time = "2024-01-28T20:17:23.177Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl", hash = "sha256:7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45", size = 6754, upload-time = "2024-01-28T20:17:22.105Z" },
+]
+
 [[package]]
 name = "pytest-timeout"
 version = "2.4.0"
@@ -2951,6 +3001,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/58/29/93c53c098d301132196c3238c312825324740851d77a8500a2462c0fd888/setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0", size = 1201470, upload-time = "2025-05-20T14:02:51.348Z" },
 ]
 
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
 [[package]]
 name = "six"
 version = "1.17.0"
@@ -3383,6 +3442,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177, upload-time = "2024-07-19T09:26:48.863Z" },
 ]
 
+[[package]]
+name = "tabulate"
+version = "0.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" },
+]
+
 [[package]]
 name = "tenacity"
 version = "9.1.2"
@@ -3425,6 +3493,29 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" },
 ]
 
+[[package]]
+name = "together"
+version = "1.5.21"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "aiohttp" },
+    { name = "click" },
+    { name = "eval-type-backport" },
+    { name = "filelock" },
+    { name = "numpy" },
+    { name = "pillow" },
+    { name = "pydantic" },
+    { name = "requests" },
+    { name = "rich" },
+    { name = "tabulate" },
+    { name = "tqdm" },
+    { name = "typer" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ea/53/e33c5e6d53c2e2bbd07f9dcb1979e27ac670fca0e4e238b169aa4c358ee2/together-1.5.21.tar.gz", hash = "sha256:59adb8cf4c5b77eca76b8c66a73c47c45fd828aaf4f059f33f893f8c5f68f85a", size = 69887, upload-time = "2025-07-10T21:04:43.781Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/31/6556a303ea39929fa016f4260eef289b620cf366a576c304507cb75b4d12/together-1.5.21-py3-none-any.whl", hash = "sha256:35e6c0072033a2e5f1105de8781e969f41cffc85dae508b6f4dc293360026872", size = 96141, upload-time = "2025-07-10T21:04:42.418Z" },
+]
+
 [[package]]
 name = "tokenizers"
 version = "0.21.1"
@@ -3643,6 +3734,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/aa/22/733a6fc4a6445d835242f64c490fdd30f4a08d58f2b788613de3f9170692/transformers-4.50.3-py3-none-any.whl", hash = "sha256:6111610a43dec24ef32c3df0632c6b25b07d9711c01d9e1077bdd2ff6b14a38c", size = 10180411, upload-time = "2025-03-28T18:20:59.265Z" },
 ]
 
+[[package]]
+name = "typer"
+version = "0.15.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559, upload-time = "2025-05-14T16:34:57.704Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258, upload-time = "2025-05-14T16:34:55.583Z" },
+]
+
 [[package]]
 name = "types-requests"
 version = "2.32.0.20241016"