diff --git a/.github/workflows/integration-auth-tests.yml b/.github/workflows/integration-auth-tests.yml index 7822e4216..cf10e005c 100644 --- a/.github/workflows/integration-auth-tests.yml +++ b/.github/workflows/integration-auth-tests.yml @@ -35,7 +35,7 @@ jobs: - name: Install minikube if: ${{ matrix.auth-provider == 'kubernetes' }} - uses: medyagh/setup-minikube@cea33675329b799adccc9526aa5daccc26cd5052 # v0.0.19 + uses: medyagh/setup-minikube@e3c7f79eb1e997eabccc536a6cf318a2b0fe19d9 # v0.0.20 - name: Start minikube if: ${{ matrix.auth-provider == 'oauth2_token' }} diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index d9a918fb5..6e794b36f 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -96,7 +96,7 @@ FROM $container_base WORKDIR /app # We install the Python 3.12 dev headers and build tools so that any -# C‑extension wheels (e.g. polyleven, faiss‑cpu) can compile successfully. +# C-extension wheels (e.g. polyleven, faiss-cpu) can compile successfully. RUN dnf -y update && dnf install -y iputils git net-tools wget \ vim-minimal python3.12 python3.12-pip python3.12-wheel \ @@ -169,7 +169,7 @@ if [ -n "$run_config" ]; then echo "Copying external providers directory: $external_providers_dir" cp -r "$external_providers_dir" "$BUILD_CONTEXT_DIR/providers.d" add_to_container << EOF -COPY --chmod=g+w providers.d /.llama/providers.d +COPY providers.d /.llama/providers.d EOF fi diff --git a/llama_stack/providers/remote/inference/groq/groq.py b/llama_stack/providers/remote/inference/groq/groq.py index 4b295e788..91c6b6c17 100644 --- a/llama_stack/providers/remote/inference/groq/groq.py +++ b/llama_stack/providers/remote/inference/groq/groq.py @@ -38,24 +38,18 @@ class GroqInferenceAdapter(LiteLLMOpenAIMixin): provider_data_api_key_field="groq_api_key", ) self.config = config - self._openai_client = None async def initialize(self): await super().initialize() async def shutdown(self): await super().shutdown() - if self._openai_client: - await self._openai_client.close() - self._openai_client = None def _get_openai_client(self) -> AsyncOpenAI: - if not self._openai_client: - self._openai_client = AsyncOpenAI( - base_url=f"{self.config.url}/openai/v1", - api_key=self.config.api_key, - ) - return self._openai_client + return AsyncOpenAI( + base_url=f"{self.config.url}/openai/v1", + api_key=self.get_api_key(), + ) async def openai_chat_completion( self, diff --git a/llama_stack/providers/remote/inference/openai/openai.py b/llama_stack/providers/remote/inference/openai/openai.py index 72428422f..818883919 100644 --- a/llama_stack/providers/remote/inference/openai/openai.py +++ b/llama_stack/providers/remote/inference/openai/openai.py @@ -59,9 +59,6 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): # if we do not set this, users will be exposed to the # litellm specific model names, an abstraction leak. self.is_openai_compat = True - self._openai_client = AsyncOpenAI( - api_key=self.config.api_key, - ) async def initialize(self) -> None: await super().initialize() @@ -69,6 +66,11 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): async def shutdown(self) -> None: await super().shutdown() + def _get_openai_client(self) -> AsyncOpenAI: + return AsyncOpenAI( + api_key=self.get_api_key(), + ) + async def openai_completion( self, model: str, @@ -120,7 +122,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): user=user, suffix=suffix, ) - return await self._openai_client.completions.create(**params) + return await self._get_openai_client().completions.create(**params) async def openai_chat_completion( self, @@ -176,7 +178,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): top_p=top_p, user=user, ) - return await self._openai_client.chat.completions.create(**params) + return await self._get_openai_client().chat.completions.create(**params) async def openai_embeddings( self, @@ -204,7 +206,7 @@ class OpenAIInferenceAdapter(LiteLLMOpenAIMixin): params["user"] = user # Call OpenAI embeddings API - response = await self._openai_client.embeddings.create(**params) + response = await self._get_openai_client().embeddings.create(**params) data = [] for i, embedding_data in enumerate(response.data): diff --git a/llama_stack/providers/remote/inference/sambanova/sambanova.py b/llama_stack/providers/remote/inference/sambanova/sambanova.py index 20f863665..9c2dda889 100644 --- a/llama_stack/providers/remote/inference/sambanova/sambanova.py +++ b/llama_stack/providers/remote/inference/sambanova/sambanova.py @@ -7,6 +7,7 @@ import json from collections.abc import Iterable +import requests from openai.types.chat import ( ChatCompletionAssistantMessageParam as OpenAIChatCompletionAssistantMessage, ) @@ -56,6 +57,7 @@ from llama_stack.apis.inference import ( ToolResponseMessage, UserMessage, ) +from llama_stack.apis.models import Model from llama_stack.log import get_logger from llama_stack.models.llama.datatypes import BuiltinTool from llama_stack.providers.utils.inference.litellm_openai_mixin import LiteLLMOpenAIMixin @@ -176,10 +178,11 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin): def __init__(self, config: SambaNovaImplConfig): self.config = config + self.environment_available_models = [] LiteLLMOpenAIMixin.__init__( self, model_entries=MODEL_ENTRIES, - api_key_from_config=self.config.api_key, + api_key_from_config=self.config.api_key.get_secret_value() if self.config.api_key else None, provider_data_api_key_field="sambanova_api_key", ) @@ -246,6 +249,22 @@ class SambaNovaInferenceAdapter(LiteLLMOpenAIMixin): **get_sampling_options(request.sampling_params), } + async def register_model(self, model: Model) -> Model: + model_id = self.get_provider_model_id(model.provider_resource_id) + + list_models_url = self.config.url + "/models" + if len(self.environment_available_models) == 0: + try: + response = requests.get(list_models_url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Request to {list_models_url} failed") from e + self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] + + if model_id.split("sambanova/")[-1] not in self.environment_available_models: + logger.warning(f"Model {model_id} not available in {list_models_url}") + return model + async def initialize(self): await super().initialize() diff --git a/llama_stack/providers/remote/inference/together/together.py b/llama_stack/providers/remote/inference/together/together.py index 9e6877b7c..e1eb934c5 100644 --- a/llama_stack/providers/remote/inference/together/together.py +++ b/llama_stack/providers/remote/inference/together/together.py @@ -68,19 +68,12 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi def __init__(self, config: TogetherImplConfig) -> None: ModelRegistryHelper.__init__(self, MODEL_ENTRIES) self.config = config - self._client = None - self._openai_client = None async def initialize(self) -> None: pass async def shutdown(self) -> None: - if self._client: - # Together client has no close method, so just set to None - self._client = None - if self._openai_client: - await self._openai_client.close() - self._openai_client = None + pass async def completion( self, @@ -108,29 +101,25 @@ class TogetherInferenceAdapter(ModelRegistryHelper, Inference, NeedsRequestProvi return await self._nonstream_completion(request) def _get_client(self) -> AsyncTogether: - if not self._client: - together_api_key = None - config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None - if config_api_key: - together_api_key = config_api_key - else: - provider_data = self.get_request_provider_data() - if provider_data is None or not provider_data.together_api_key: - raise ValueError( - 'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": }' - ) - together_api_key = provider_data.together_api_key - self._client = AsyncTogether(api_key=together_api_key) - return self._client + together_api_key = None + config_api_key = self.config.api_key.get_secret_value() if self.config.api_key else None + if config_api_key: + together_api_key = config_api_key + else: + provider_data = self.get_request_provider_data() + if provider_data is None or not provider_data.together_api_key: + raise ValueError( + 'Pass Together API Key in the header X-LlamaStack-Provider-Data as { "together_api_key": }' + ) + together_api_key = provider_data.together_api_key + return AsyncTogether(api_key=together_api_key) def _get_openai_client(self) -> AsyncOpenAI: - if not self._openai_client: - together_client = self._get_client().client - self._openai_client = AsyncOpenAI( - base_url=together_client.base_url, - api_key=together_client.api_key, - ) - return self._openai_client + together_client = self._get_client().client + return AsyncOpenAI( + base_url=together_client.base_url, + api_key=together_client.api_key, + ) async def _nonstream_completion(self, request: CompletionRequest) -> ChatCompletionResponse: params = await self._get_params(request) diff --git a/llama_stack/providers/remote/safety/sambanova/sambanova.py b/llama_stack/providers/remote/safety/sambanova/sambanova.py index 84c8267ae..1a65f6aa1 100644 --- a/llama_stack/providers/remote/safety/sambanova/sambanova.py +++ b/llama_stack/providers/remote/safety/sambanova/sambanova.py @@ -33,6 +33,7 @@ CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProviderData): def __init__(self, config: SambaNovaSafetyConfig) -> None: self.config = config + self.environment_available_models = [] async def initialize(self) -> None: pass @@ -54,18 +55,18 @@ class SambaNovaSafetyAdapter(Safety, ShieldsProtocolPrivate, NeedsRequestProvide async def register_shield(self, shield: Shield) -> None: list_models_url = self.config.url + "/models" - try: - response = requests.get(list_models_url) - response.raise_for_status() - except requests.exceptions.RequestException as e: - raise RuntimeError(f"Request to {list_models_url} failed") from e - available_models = [model.get("id") for model in response.json().get("data", {})] + if len(self.environment_available_models) == 0: + try: + response = requests.get(list_models_url) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Request to {list_models_url} failed") from e + self.environment_available_models = [model.get("id") for model in response.json().get("data", {})] if ( - len(available_models) == 0 - or "guard" not in shield.provider_resource_id.lower() - or shield.provider_resource_id.split("sambanova/")[-1] not in available_models + "guard" not in shield.provider_resource_id.lower() + or shield.provider_resource_id.split("sambanova/")[-1] not in self.environment_available_models ): - raise ValueError(f"Shield {shield.provider_resource_id} not found in SambaNova") + logger.warning(f"Shield {shield.provider_resource_id} not available in {list_models_url}") async def run_shield( self, shield_id: str, messages: list[Message], params: dict[str, Any] | None = None diff --git a/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx new file mode 100644 index 000000000..f27c9d802 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/[id]/page.tsx @@ -0,0 +1,82 @@ +"use client"; + +import { useEffect, useState } from "react"; +import { useParams, useRouter } from "next/navigation"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; +import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; +import { VectorStoreDetailView } from "@/components/vector-stores/vector-store-detail"; + +export default function VectorStoreDetailPage() { + const params = useParams(); + const id = params.id as string; + const client = useAuthClient(); + const router = useRouter(); + + const [store, setStore] = useState(null); + const [files, setFiles] = useState([]); + const [isLoadingStore, setIsLoadingStore] = useState(true); + const [isLoadingFiles, setIsLoadingFiles] = useState(true); + const [errorStore, setErrorStore] = useState(null); + const [errorFiles, setErrorFiles] = useState(null); + + useEffect(() => { + if (!id) { + setErrorStore(new Error("Vector Store ID is missing.")); + setIsLoadingStore(false); + return; + } + const fetchStore = async () => { + setIsLoadingStore(true); + setErrorStore(null); + try { + const response = await client.vectorStores.retrieve(id); + setStore(response as VectorStore); + } catch (err) { + setErrorStore( + err instanceof Error + ? err + : new Error("Failed to load vector store."), + ); + } finally { + setIsLoadingStore(false); + } + }; + fetchStore(); + }, [id, client]); + + useEffect(() => { + if (!id) { + setErrorFiles(new Error("Vector Store ID is missing.")); + setIsLoadingFiles(false); + return; + } + const fetchFiles = async () => { + setIsLoadingFiles(true); + setErrorFiles(null); + try { + const result = await client.vectorStores.files.list(id as any); + setFiles((result as any).data); + } catch (err) { + setErrorFiles( + err instanceof Error ? err : new Error("Failed to load files."), + ); + } finally { + setIsLoadingFiles(false); + } + }; + fetchFiles(); + }, [id]); + + return ( + + ); +} diff --git a/llama_stack/ui/app/logs/vector-stores/layout.tsx b/llama_stack/ui/app/logs/vector-stores/layout.tsx new file mode 100644 index 000000000..9245f5486 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/layout.tsx @@ -0,0 +1,16 @@ +"use client"; + +import React from "react"; +import LogsLayout from "@/components/layout/logs-layout"; + +export default function VectorStoresLayout({ + children, +}: { + children: React.ReactNode; +}) { + return ( + + {children} + + ); +} diff --git a/llama_stack/ui/app/logs/vector-stores/page.tsx b/llama_stack/ui/app/logs/vector-stores/page.tsx new file mode 100644 index 000000000..29e1fabd6 --- /dev/null +++ b/llama_stack/ui/app/logs/vector-stores/page.tsx @@ -0,0 +1,121 @@ +"use client"; + +import React from "react"; +import { useAuthClient } from "@/hooks/use-auth-client"; +import type { + ListVectorStoresResponse, + VectorStore, +} from "llama-stack-client/resources/vector-stores/vector-stores"; +import { useRouter } from "next/navigation"; +import { usePagination } from "@/hooks/use-pagination"; +import { + Table, + TableBody, + TableCaption, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; +import { Skeleton } from "@/components/ui/skeleton"; + +export default function VectorStoresPage() { + const client = useAuthClient(); + const router = useRouter(); + const { + data: stores, + status, + hasMore, + error, + loadMore, + } = usePagination({ + limit: 20, + order: "desc", + fetchFunction: async (client, params) => { + const response = await client.vectorStores.list({ + after: params.after, + limit: params.limit, + order: params.order, + } as any); + return response as ListVectorStoresResponse; + }, + errorMessagePrefix: "vector stores", + }); + + // Auto-load all pages for infinite scroll behavior (like Responses) + React.useEffect(() => { + if (status === "idle" && hasMore) { + loadMore(); + } + }, [status, hasMore, loadMore]); + + if (status === "loading") { + return ( +
+ + + +
+ ); + } + + if (status === "error") { + return
Error: {error?.message}
; + } + + if (!stores || stores.length === 0) { + return

No vector stores found.

; + } + + return ( +
+ + + + ID + Name + Created + Completed + Cancelled + Failed + In Progress + Total + Usage Bytes + Provider ID + Provider Vector DB ID + + + + {stores.map((store) => { + const fileCounts = store.file_counts; + const metadata = store.metadata || {}; + const providerId = metadata.provider_id ?? ""; + const providerDbId = metadata.provider_vector_db_id ?? ""; + + return ( + router.push(`/logs/vector-stores/${store.id}`)} + className="cursor-pointer hover:bg-muted/50" + > + {store.id} + {store.name} + + {new Date(store.created_at * 1000).toLocaleString()} + + {fileCounts.completed} + {fileCounts.cancelled} + {fileCounts.failed} + {fileCounts.in_progress} + {fileCounts.total} + {store.usage_bytes} + {providerId} + {providerDbId} + + ); + })} + +
+
+ ); +} diff --git a/llama_stack/ui/components/layout/app-sidebar.tsx b/llama_stack/ui/components/layout/app-sidebar.tsx index 1c53d6cc5..532e43dbd 100644 --- a/llama_stack/ui/components/layout/app-sidebar.tsx +++ b/llama_stack/ui/components/layout/app-sidebar.tsx @@ -1,6 +1,11 @@ "use client"; -import { MessageSquareText, MessagesSquare, MoveUpRight } from "lucide-react"; +import { + MessageSquareText, + MessagesSquare, + MoveUpRight, + Database, +} from "lucide-react"; import Link from "next/link"; import { usePathname } from "next/navigation"; import { cn } from "@/lib/utils"; @@ -28,6 +33,11 @@ const logItems = [ url: "/logs/responses", icon: MessagesSquare, }, + { + title: "Vector Stores", + url: "/logs/vector-stores", + icon: Database, + }, { title: "Documentation", url: "https://llama-stack.readthedocs.io/en/latest/references/api_reference/index.html", @@ -57,13 +67,13 @@ export function AppSidebar() { className={cn( "justify-start", isActive && - "bg-gray-200 hover:bg-gray-200 text-primary hover:text-primary", + "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100", )} > diff --git a/llama_stack/ui/components/layout/detail-layout.tsx b/llama_stack/ui/components/layout/detail-layout.tsx index 58b912703..3013195a2 100644 --- a/llama_stack/ui/components/layout/detail-layout.tsx +++ b/llama_stack/ui/components/layout/detail-layout.tsx @@ -93,7 +93,9 @@ export function PropertyItem({ > {label}:{" "} {typeof value === "string" || typeof value === "number" ? ( - {value} + + {value} + ) : ( value )} @@ -112,7 +114,9 @@ export function PropertiesCard({ children }: PropertiesCardProps) { Properties -
    {children}
+
    + {children} +
); diff --git a/llama_stack/ui/components/ui/message-components.tsx b/llama_stack/ui/components/ui/message-components.tsx index 50ccd623e..39cb570b7 100644 --- a/llama_stack/ui/components/ui/message-components.tsx +++ b/llama_stack/ui/components/ui/message-components.tsx @@ -17,10 +17,10 @@ export const MessageBlock: React.FC = ({ }) => { return (
-

+

{label} {labelDetail && ( - + {labelDetail} )} diff --git a/llama_stack/ui/components/vector-stores/vector-store-detail.tsx b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx new file mode 100644 index 000000000..7c5c91dd3 --- /dev/null +++ b/llama_stack/ui/components/vector-stores/vector-store-detail.tsx @@ -0,0 +1,128 @@ +"use client"; + +import type { VectorStore } from "llama-stack-client/resources/vector-stores/vector-stores"; +import type { VectorStoreFile } from "llama-stack-client/resources/vector-stores/files"; +import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"; +import { Skeleton } from "@/components/ui/skeleton"; +import { + DetailLoadingView, + DetailErrorView, + DetailNotFoundView, + DetailLayout, + PropertiesCard, + PropertyItem, +} from "@/components/layout/detail-layout"; +import { + Table, + TableBody, + TableCaption, + TableCell, + TableHead, + TableHeader, + TableRow, +} from "@/components/ui/table"; + +interface VectorStoreDetailViewProps { + store: VectorStore | null; + files: VectorStoreFile[]; + isLoadingStore: boolean; + isLoadingFiles: boolean; + errorStore: Error | null; + errorFiles: Error | null; + id: string; +} + +export function VectorStoreDetailView({ + store, + files, + isLoadingStore, + isLoadingFiles, + errorStore, + errorFiles, + id, +}: VectorStoreDetailViewProps) { + const title = "Vector Store Details"; + + if (errorStore) { + return ; + } + if (isLoadingStore) { + return ; + } + if (!store) { + return ; + } + + const mainContent = ( + <> + + + Files + + + {isLoadingFiles ? ( + + ) : errorFiles ? ( +

+ Error loading files: {errorFiles.message} +
+ ) : files.length > 0 ? ( + + Files in this vector store + + + ID + Status + Created + Usage Bytes + + + + {files.map((file) => ( + + {file.id} + {file.status} + + {new Date(file.created_at * 1000).toLocaleString()} + + {file.usage_bytes} + + ))} + +
+ ) : ( +

+ No files in this vector store. +

+ )} + + + + ); + + const sidebar = ( + + + + + + + + + + + ); + + return ( + + ); +} diff --git a/llama_stack/ui/package-lock.json b/llama_stack/ui/package-lock.json index 8fd5fb56c..158569241 100644 --- a/llama_stack/ui/package-lock.json +++ b/llama_stack/ui/package-lock.json @@ -15,7 +15,7 @@ "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "llama-stack-client": "0.2.13", + "llama-stack-client": "^0.2.14", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11", @@ -676,406 +676,6 @@ "tslib": "^2.4.0" } }, - "node_modules/@esbuild/aix-ppc64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/aix-ppc64/-/aix-ppc64-0.25.5.tgz", - "integrity": "sha512-9o3TMmpmftaCMepOdA5k/yDw8SfInyzWWTjYTFCX3kPSDJMROQTb8jg+h9Cnwnmm1vOzvxN7gIfB5V2ewpjtGA==", - "cpu": [ - "ppc64" - ], - "license": "MIT", - "optional": true, - "os": [ - "aix" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm/-/android-arm-0.25.5.tgz", - "integrity": "sha512-AdJKSPeEHgi7/ZhuIPtcQKr5RQdo6OO2IL87JkianiMYMPbCtot9fxPbrMiBADOWWm3T2si9stAiVsGbTQFkbA==", - "cpu": [ - "arm" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-arm64/-/android-arm64-0.25.5.tgz", - "integrity": "sha512-VGzGhj4lJO+TVGV1v8ntCZWJktV7SGCs3Pn1GRWI1SBFtRALoomm8k5E9Pmwg3HOAal2VDc2F9+PM/rEY6oIDg==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/android-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/android-x64/-/android-x64-0.25.5.tgz", - "integrity": "sha512-D2GyJT1kjvO//drbRT3Hib9XPwQeWd9vZoBJn+bu/lVsOZ13cqNdDeqIF/xQ5/VmWvMduP6AmXvylO/PIc2isw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "android" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-arm64/-/darwin-arm64-0.25.5.tgz", - "integrity": "sha512-GtaBgammVvdF7aPIgH2jxMDdivezgFu6iKpmT+48+F8Hhg5J/sfnDieg0aeG/jfSvkYQU2/pceFPDKlqZzwnfQ==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/darwin-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/darwin-x64/-/darwin-x64-0.25.5.tgz", - "integrity": "sha512-1iT4FVL0dJ76/q1wd7XDsXrSW+oLoquptvh4CLR4kITDtqi2e/xwXwdCVH8hVHU43wgJdsq7Gxuzcs6Iq/7bxQ==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "darwin" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-arm64/-/freebsd-arm64-0.25.5.tgz", - "integrity": "sha512-nk4tGP3JThz4La38Uy/gzyXtpkPW8zSAmoUhK9xKKXdBCzKODMc2adkB2+8om9BDYugz+uGV7sLmpTYzvmz6Sw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/freebsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/freebsd-x64/-/freebsd-x64-0.25.5.tgz", - "integrity": "sha512-PrikaNjiXdR2laW6OIjlbeuCPrPaAl0IwPIaRv+SMV8CiM8i2LqVUHFC1+8eORgWyY7yhQY+2U2fA55mBzReaw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "freebsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm/-/linux-arm-0.25.5.tgz", - "integrity": "sha512-cPzojwW2okgh7ZlRpcBEtsX7WBuqbLrNXqLU89GxWbNt6uIg78ET82qifUy3W6OVww6ZWobWub5oqZOVtwolfw==", - "cpu": [ - "arm" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-arm64/-/linux-arm64-0.25.5.tgz", - "integrity": "sha512-Z9kfb1v6ZlGbWj8EJk9T6czVEjjq2ntSYLY2cw6pAZl4oKtfgQuS4HOq41M/BcoLPzrUbNd+R4BXFyH//nHxVg==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ia32": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ia32/-/linux-ia32-0.25.5.tgz", - "integrity": "sha512-sQ7l00M8bSv36GLV95BVAdhJ2QsIbCuCjh/uYrWiMQSUuV+LpXwIqhgJDcvMTj+VsQmqAHL2yYaasENvJ7CDKA==", - "cpu": [ - "ia32" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-loong64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-loong64/-/linux-loong64-0.25.5.tgz", - "integrity": "sha512-0ur7ae16hDUC4OL5iEnDb0tZHDxYmuQyhKhsPBV8f99f6Z9KQM02g33f93rNH5A30agMS46u2HP6qTdEt6Q1kg==", - "cpu": [ - "loong64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-mips64el": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-mips64el/-/linux-mips64el-0.25.5.tgz", - "integrity": "sha512-kB/66P1OsHO5zLz0i6X0RxlQ+3cu0mkxS3TKFvkb5lin6uwZ/ttOkP3Z8lfR9mJOBk14ZwZ9182SIIWFGNmqmg==", - "cpu": [ - "mips64el" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-ppc64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-ppc64/-/linux-ppc64-0.25.5.tgz", - "integrity": "sha512-UZCmJ7r9X2fe2D6jBmkLBMQetXPXIsZjQJCjgwpVDz+YMcS6oFR27alkgGv3Oqkv07bxdvw7fyB71/olceJhkQ==", - "cpu": [ - "ppc64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-riscv64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-riscv64/-/linux-riscv64-0.25.5.tgz", - "integrity": "sha512-kTxwu4mLyeOlsVIFPfQo+fQJAV9mh24xL+y+Bm6ej067sYANjyEw1dNHmvoqxJUCMnkBdKpvOn0Ahql6+4VyeA==", - "cpu": [ - "riscv64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-s390x": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-s390x/-/linux-s390x-0.25.5.tgz", - "integrity": "sha512-K2dSKTKfmdh78uJ3NcWFiqyRrimfdinS5ErLSn3vluHNeHVnBAFWC8a4X5N+7FgVE1EjXS1QDZbpqZBjfrqMTQ==", - "cpu": [ - "s390x" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/linux-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/linux-x64/-/linux-x64-0.25.5.tgz", - "integrity": "sha512-uhj8N2obKTE6pSZ+aMUbqq+1nXxNjZIIjCjGLfsWvVpy7gKCOL6rsY1MhRh9zLtUtAI7vpgLMK6DxjO8Qm9lJw==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "linux" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-arm64/-/netbsd-arm64-0.25.5.tgz", - "integrity": "sha512-pwHtMP9viAy1oHPvgxtOv+OkduK5ugofNTVDilIzBLpoWAM16r7b/mxBvfpuQDpRQFMfuVr5aLcn4yveGvBZvw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/netbsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/netbsd-x64/-/netbsd-x64-0.25.5.tgz", - "integrity": "sha512-WOb5fKrvVTRMfWFNCroYWWklbnXH0Q5rZppjq0vQIdlsQKuw6mdSihwSo4RV/YdQ5UCKKvBy7/0ZZYLBZKIbwQ==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "netbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-arm64/-/openbsd-arm64-0.25.5.tgz", - "integrity": "sha512-7A208+uQKgTxHd0G0uqZO8UjK2R0DDb4fDmERtARjSHWxqMTye4Erz4zZafx7Di9Cv+lNHYuncAkiGFySoD+Mw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/openbsd-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/openbsd-x64/-/openbsd-x64-0.25.5.tgz", - "integrity": "sha512-G4hE405ErTWraiZ8UiSoesH8DaCsMm0Cay4fsFWOOUcz8b8rC6uCvnagr+gnioEjWn0wC+o1/TAHt+It+MpIMg==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "openbsd" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/sunos-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/sunos-x64/-/sunos-x64-0.25.5.tgz", - "integrity": "sha512-l+azKShMy7FxzY0Rj4RCt5VD/q8mG/e+mDivgspo+yL8zW7qEwctQ6YqKX34DTEleFAvCIUviCFX1SDZRSyMQA==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "sunos" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-arm64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-arm64/-/win32-arm64-0.25.5.tgz", - "integrity": "sha512-O2S7SNZzdcFG7eFKgvwUEZ2VG9D/sn/eIiz8XRZ1Q/DO5a3s76Xv0mdBzVM5j5R639lXQmPmSo0iRpHqUUrsxw==", - "cpu": [ - "arm64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-ia32": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-ia32/-/win32-ia32-0.25.5.tgz", - "integrity": "sha512-onOJ02pqs9h1iMJ1PQphR+VZv8qBMQ77Klcsqv9CNW2w6yLqoURLcgERAIurY6QE63bbLuqgP9ATqajFLK5AMQ==", - "cpu": [ - "ia32" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, - "node_modules/@esbuild/win32-x64": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/@esbuild/win32-x64/-/win32-x64-0.25.5.tgz", - "integrity": "sha512-TXv6YnJ8ZMVdX+SXWVBo/0p8LTcrUYngpWjvm91TMjjBQii7Oz11Lw5lbDV5Y0TzuhSJHwiH4hEtC1I42mMS0g==", - "cpu": [ - "x64" - ], - "license": "MIT", - "optional": true, - "os": [ - "win32" - ], - "engines": { - "node": ">=18" - } - }, "node_modules/@eslint-community/eslint-utils": { "version": "4.7.0", "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.7.0.tgz", @@ -5999,46 +5599,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/esbuild": { - "version": "0.25.5", - "resolved": "https://registry.npmjs.org/esbuild/-/esbuild-0.25.5.tgz", - "integrity": "sha512-P8OtKZRv/5J5hhz0cUAdu/cLuPIKXpQl1R9pZtvmHWQvrAUVd0UNIPT4IB4W3rNOqVO0rlqHmCIbSwxh/c9yUQ==", - "hasInstallScript": true, - "license": "MIT", - "bin": { - "esbuild": "bin/esbuild" - }, - "engines": { - "node": ">=18" - }, - "optionalDependencies": { - "@esbuild/aix-ppc64": "0.25.5", - "@esbuild/android-arm": "0.25.5", - "@esbuild/android-arm64": "0.25.5", - "@esbuild/android-x64": "0.25.5", - "@esbuild/darwin-arm64": "0.25.5", - "@esbuild/darwin-x64": "0.25.5", - "@esbuild/freebsd-arm64": "0.25.5", - "@esbuild/freebsd-x64": "0.25.5", - "@esbuild/linux-arm": "0.25.5", - "@esbuild/linux-arm64": "0.25.5", - "@esbuild/linux-ia32": "0.25.5", - "@esbuild/linux-loong64": "0.25.5", - "@esbuild/linux-mips64el": "0.25.5", - "@esbuild/linux-ppc64": "0.25.5", - "@esbuild/linux-riscv64": "0.25.5", - "@esbuild/linux-s390x": "0.25.5", - "@esbuild/linux-x64": "0.25.5", - "@esbuild/netbsd-arm64": "0.25.5", - "@esbuild/netbsd-x64": "0.25.5", - "@esbuild/openbsd-arm64": "0.25.5", - "@esbuild/openbsd-x64": "0.25.5", - "@esbuild/sunos-x64": "0.25.5", - "@esbuild/win32-arm64": "0.25.5", - "@esbuild/win32-ia32": "0.25.5", - "@esbuild/win32-x64": "0.25.5" - } - }, "node_modules/escalade": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.2.0.tgz", @@ -6993,6 +6553,7 @@ "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "integrity": "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==", + "dev": true, "hasInstallScript": true, "license": "MIT", "optional": true, @@ -7154,6 +6715,7 @@ "version": "4.10.0", "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.10.0.tgz", "integrity": "sha512-kGzZ3LWWQcGIAmg6iWvXn0ei6WDtV26wzHRMwDSzmAbcXrTEXxHy6IehI6/4eT6VRKyMP1eF1VqwrVUmE/LR7A==", + "dev": true, "license": "MIT", "dependencies": { "resolve-pkg-maps": "^1.0.0" @@ -9537,9 +9099,10 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.2.13", - "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.13.tgz", - "integrity": "sha512-R1rTFLwgUimr+KjEUkzUvFL6vLASwS9qj3UDSVkJ5BmrKAs5GwVAMeL7yZaTBXGuPUVh124WSlC4d9H0FjWqLA==", + "version": "0.2.14", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.2.14.tgz", + "integrity": "sha512-bVU3JHp+EPEKR0Vb9vcd9ZyQj/72jSDuptKLwOXET9WrkphIQ8xuW5ueecMTgq8UEls3lwB3HiZM2cDOR9eDsQ==", + "license": "Apache-2.0", "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", @@ -9547,8 +9110,7 @@ "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", - "node-fetch": "^2.6.7", - "tsx": "^4.19.2" + "node-fetch": "^2.6.7" } }, "node_modules/llama-stack-client/node_modules/@types/node": { @@ -11148,6 +10710,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", "integrity": "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw==", + "dev": true, "license": "MIT", "funding": { "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" @@ -12198,25 +11761,6 @@ "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, - "node_modules/tsx": { - "version": "4.19.4", - "resolved": "https://registry.npmjs.org/tsx/-/tsx-4.19.4.tgz", - "integrity": "sha512-gK5GVzDkJK1SI1zwHf32Mqxf2tSJkNx+eYcNly5+nHvWqXUJYUkWBQtKauoESz3ymezAI++ZwT855x5p5eop+Q==", - "license": "MIT", - "dependencies": { - "esbuild": "~0.25.0", - "get-tsconfig": "^4.7.5" - }, - "bin": { - "tsx": "dist/cli.mjs" - }, - "engines": { - "node": ">=18.0.0" - }, - "optionalDependencies": { - "fsevents": "~2.3.3" - } - }, "node_modules/tw-animate-css": { "version": "1.2.9", "resolved": "https://registry.npmjs.org/tw-animate-css/-/tw-animate-css-1.2.9.tgz", diff --git a/llama_stack/ui/package.json b/llama_stack/ui/package.json index 9524ce0a5..b38efe309 100644 --- a/llama_stack/ui/package.json +++ b/llama_stack/ui/package.json @@ -20,7 +20,7 @@ "@radix-ui/react-tooltip": "^1.2.6", "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", - "llama-stack-client": "0.2.13", + "llama-stack-client": "^0.2.14", "lucide-react": "^0.510.0", "next": "15.3.3", "next-auth": "^4.24.11", diff --git a/pyproject.toml b/pyproject.toml index d84a823a3..2974ff996 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,8 +42,8 @@ dependencies = [ "h11>=0.16.0", "python-multipart>=0.0.20", # For fastapi Form "uvicorn>=0.34.0", # server - "opentelemetry-sdk", # server - "opentelemetry-exporter-otlp-proto-http", # server + "opentelemetry-sdk>=1.30.0", # server + "opentelemetry-exporter-otlp-proto-http>=1.30.0", # server "aiosqlite>=0.21.0", # server - for metadata store "asyncpg", # for metadata store ] @@ -58,12 +58,13 @@ ui = [ [dependency-groups] dev = [ - "pytest", + "pytest>=8.4", "pytest-timeout", - "pytest-asyncio", + "pytest-asyncio>=1.0", "pytest-cov", "pytest-html", "pytest-json-report", + "pytest-socket", # For blocking network access in unit tests "nbval", # For notebook testing "black", "ruff", @@ -87,6 +88,8 @@ unit = [ "blobfile", "faiss-cpu", "pymilvus>=2.5.12", + "litellm", + "together", ] # These are the core dependencies required for running integration tests. They are shared across all # providers. If a provider requires additional dependencies, please add them to your environment @@ -339,3 +342,9 @@ warn_required_dynamic_aliases = true [tool.ruff.lint.pep8-naming] classmethod-decorators = ["classmethod", "pydantic.field_validator"] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +markers = [ + "allow_network: Allow network access for specific unit tests", +] diff --git a/scripts/unit-tests.sh b/scripts/unit-tests.sh index 1fc3ff7fc..68d6458fc 100755 --- a/scripts/unit-tests.sh +++ b/scripts/unit-tests.sh @@ -16,4 +16,4 @@ if [ $FOUND_PYTHON -ne 0 ]; then uv python install "$PYTHON_VERSION" fi -uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest --asyncio-mode=auto -s -v tests/unit/ $@ +uv run --python "$PYTHON_VERSION" --with-editable . --group unit pytest -s -v tests/unit/ $@ diff --git a/tests/integration/agents/test_persistence.py b/tests/integration/agents/test_persistence.py index ef35c97a5..49d9d42d0 100644 --- a/tests/integration/agents/test_persistence.py +++ b/tests/integration/agents/test_persistence.py @@ -44,7 +44,6 @@ def common_params(inference_model): ) -@pytest.mark.asyncio @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world") async def test_delete_agents_and_sessions(self, agents_stack, common_params): agents_impl = agents_stack.impls[Api.agents] @@ -73,7 +72,6 @@ async def test_delete_agents_and_sessions(self, agents_stack, common_params): assert agent_response is None -@pytest.mark.asyncio @pytest.mark.skip(reason="This test needs to be migrated to api / client-sdk world") async def test_get_agent_turns_and_steps(self, agents_stack, sample_messages, common_params): agents_impl = agents_stack.impls[Api.agents] diff --git a/tests/integration/inference/test_openai_completion.py b/tests/integration/inference/test_openai_completion.py index 05aee5096..e82714ffd 100644 --- a/tests/integration/inference/test_openai_completion.py +++ b/tests/integration/inference/test_openai_completion.py @@ -71,7 +71,6 @@ def skip_if_model_doesnt_support_openai_chat_completion(client_with_models, mode "remote::cerebras", "remote::databricks", "remote::runpod", - "remote::sambanova", "remote::tgi", ): pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI chat completions.") diff --git a/tests/integration/inspect/test_inspect.py b/tests/integration/inspect/test_inspect.py index da704178d..1597a319b 100644 --- a/tests/integration/inspect/test_inspect.py +++ b/tests/integration/inspect/test_inspect.py @@ -4,20 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack_client import LlamaStackClient from llama_stack import LlamaStackAsLibraryClient class TestInspect: - @pytest.mark.asyncio def test_health(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): health = llama_stack_client.inspect.health() assert health is not None assert health.status == "OK" - @pytest.mark.asyncio def test_version(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): version = llama_stack_client.inspect.version() assert version is not None diff --git a/tests/integration/providers/test_providers.py b/tests/integration/providers/test_providers.py index 8b153411c..fc65e2a10 100644 --- a/tests/integration/providers/test_providers.py +++ b/tests/integration/providers/test_providers.py @@ -4,14 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack_client import LlamaStackClient from llama_stack import LlamaStackAsLibraryClient class TestProviders: - @pytest.mark.asyncio def test_providers(self, llama_stack_client: LlamaStackAsLibraryClient | LlamaStackClient): provider_list = llama_stack_client.providers.list() assert provider_list is not None diff --git a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py index bf6077532..c32d6cd17 100644 --- a/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py +++ b/tests/integration/providers/utils/sqlstore/test_authorized_sqlstore.py @@ -88,7 +88,6 @@ async def cleanup_records(sql_store, table_name, record_ids): pass -@pytest.mark.asyncio @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS) @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_store_attributes(mock_get_authenticated_user, authorized_store, request): @@ -183,7 +182,6 @@ async def test_authorized_store_attributes(mock_get_authenticated_user, authoriz await cleanup_records(authorized_store.sql_store, table_name, ["1", "2", "3", "4", "5", "6"]) -@pytest.mark.asyncio @pytest.mark.parametrize("backend_config", BACKEND_CONFIGS) @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_user_ownership_policy(mock_get_authenticated_user, authorized_store, request): diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index aedac0386..b5eb1217d 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -4,6 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import pytest_socket + # We need to import the fixtures here so that pytest can find them # but ruff doesn't think they are used and removes the import. "noqa: F401" prevents them from being removed from .fixtures import cached_disk_dist_registry, disk_dist_registry, sqlite_kvstore # noqa: F401 + + +def pytest_runtest_setup(item): + """Setup for each test - check if network access should be allowed.""" + if "allow_network" in item.keywords: + pytest_socket.enable_socket() + else: + # Allowing Unix sockets is necessary for some tests that use local servers and mocks + pytest_socket.disable_socket(allow_unix_socket=True) diff --git a/tests/unit/distribution/routers/test_routing_tables.py b/tests/unit/distribution/routers/test_routing_tables.py index 0eeb68167..3ba042bd9 100644 --- a/tests/unit/distribution/routers/test_routing_tables.py +++ b/tests/unit/distribution/routers/test_routing_tables.py @@ -8,8 +8,6 @@ from unittest.mock import AsyncMock -import pytest - from llama_stack.apis.common.type_system import NumberType from llama_stack.apis.datasets.datasets import Dataset, DatasetPurpose, URIDataSource from llama_stack.apis.datatypes import Api @@ -119,7 +117,6 @@ class ToolGroupsImpl(Impl): ) -@pytest.mark.asyncio async def test_models_routing_table(cached_disk_dist_registry): table = ModelsRoutingTable({"test_provider": InferenceImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -161,7 +158,6 @@ async def test_models_routing_table(cached_disk_dist_registry): assert len(openai_models.data) == 0 -@pytest.mark.asyncio async def test_shields_routing_table(cached_disk_dist_registry): table = ShieldsRoutingTable({"test_provider": SafetyImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -177,7 +173,6 @@ async def test_shields_routing_table(cached_disk_dist_registry): assert "test-shield-2" in shield_ids -@pytest.mark.asyncio async def test_vectordbs_routing_table(cached_disk_dist_registry): table = VectorDBsRoutingTable({"test_provider": VectorDBImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -233,7 +228,6 @@ async def test_datasets_routing_table(cached_disk_dist_registry): assert len(datasets.data) == 0 -@pytest.mark.asyncio async def test_scoring_functions_routing_table(cached_disk_dist_registry): table = ScoringFunctionsRoutingTable({"test_provider": ScoringFunctionsImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -259,7 +253,6 @@ async def test_scoring_functions_routing_table(cached_disk_dist_registry): assert "test-scoring-fn-2" in scoring_fn_ids -@pytest.mark.asyncio async def test_benchmarks_routing_table(cached_disk_dist_registry): table = BenchmarksRoutingTable({"test_provider": BenchmarksImpl()}, cached_disk_dist_registry, {}) await table.initialize() @@ -277,7 +270,6 @@ async def test_benchmarks_routing_table(cached_disk_dist_registry): assert "test-benchmark" in benchmark_ids -@pytest.mark.asyncio async def test_tool_groups_routing_table(cached_disk_dist_registry): table = ToolGroupsRoutingTable({"test_provider": ToolGroupsImpl()}, cached_disk_dist_registry, {}) await table.initialize() diff --git a/tests/unit/distribution/test_context.py b/tests/unit/distribution/test_context.py index 84944bfe8..7914be51d 100644 --- a/tests/unit/distribution/test_context.py +++ b/tests/unit/distribution/test_context.py @@ -13,7 +13,6 @@ import pytest from llama_stack.distribution.utils.context import preserve_contexts_async_generator -@pytest.mark.asyncio async def test_preserve_contexts_with_exception(): # Create context variable context_var = ContextVar("exception_var", default="initial") @@ -41,7 +40,6 @@ async def test_preserve_contexts_with_exception(): context_var.reset(token) -@pytest.mark.asyncio async def test_preserve_contexts_empty_generator(): # Create context variable context_var = ContextVar("empty_var", default="initial") @@ -66,7 +64,6 @@ async def test_preserve_contexts_empty_generator(): context_var.reset(token) -@pytest.mark.asyncio async def test_preserve_contexts_across_event_loops(): """ Test that context variables are preserved across event loop boundaries with nested generators. diff --git a/tests/unit/files/test_files.py b/tests/unit/files/test_files.py index ef1dc9743..785077e91 100644 --- a/tests/unit/files/test_files.py +++ b/tests/unit/files/test_files.py @@ -6,7 +6,6 @@ import pytest -import pytest_asyncio from llama_stack.apis.common.responses import Order from llama_stack.apis.files import OpenAIFilePurpose @@ -29,7 +28,7 @@ class MockUploadFile: return self.content -@pytest_asyncio.fixture +@pytest.fixture async def files_provider(tmp_path): """Create a files provider with temporary storage for testing.""" storage_dir = tmp_path / "files" @@ -68,7 +67,6 @@ def large_file(): class TestOpenAIFilesAPI: """Test suite for OpenAI Files API endpoints.""" - @pytest.mark.asyncio async def test_upload_file_success(self, files_provider, sample_text_file): """Test successful file upload.""" # Upload file @@ -82,7 +80,6 @@ class TestOpenAIFilesAPI: assert result.created_at > 0 assert result.expires_at > result.created_at - @pytest.mark.asyncio async def test_upload_different_purposes(self, files_provider, sample_text_file): """Test uploading files with different purposes.""" purposes = list(OpenAIFilePurpose) @@ -93,7 +90,6 @@ class TestOpenAIFilesAPI: uploaded_files.append(result) assert result.purpose == purpose - @pytest.mark.asyncio async def test_upload_different_file_types(self, files_provider, sample_text_file, sample_json_file, large_file): """Test uploading different types and sizes of files.""" files_to_test = [ @@ -107,7 +103,6 @@ class TestOpenAIFilesAPI: assert result.filename == expected_filename assert result.bytes == len(file_obj.content) - @pytest.mark.asyncio async def test_list_files_empty(self, files_provider): """Test listing files when no files exist.""" result = await files_provider.openai_list_files() @@ -117,7 +112,6 @@ class TestOpenAIFilesAPI: assert result.first_id == "" assert result.last_id == "" - @pytest.mark.asyncio async def test_list_files_with_content(self, files_provider, sample_text_file, sample_json_file): """Test listing files when files exist.""" # Upload multiple files @@ -132,7 +126,6 @@ class TestOpenAIFilesAPI: assert file1.id in file_ids assert file2.id in file_ids - @pytest.mark.asyncio async def test_list_files_with_purpose_filter(self, files_provider, sample_text_file): """Test listing files with purpose filtering.""" # Upload file with specific purpose @@ -146,7 +139,6 @@ class TestOpenAIFilesAPI: assert result.data[0].id == uploaded_file.id assert result.data[0].purpose == OpenAIFilePurpose.ASSISTANTS - @pytest.mark.asyncio async def test_list_files_with_limit(self, files_provider, sample_text_file): """Test listing files with limit parameter.""" # Upload multiple files @@ -157,7 +149,6 @@ class TestOpenAIFilesAPI: result = await files_provider.openai_list_files(limit=3) assert len(result.data) == 3 - @pytest.mark.asyncio async def test_list_files_with_order(self, files_provider, sample_text_file): """Test listing files with different order.""" # Upload multiple files @@ -178,7 +169,6 @@ class TestOpenAIFilesAPI: # Oldest should be first assert result_asc.data[0].created_at <= result_asc.data[1].created_at <= result_asc.data[2].created_at - @pytest.mark.asyncio async def test_retrieve_file_success(self, files_provider, sample_text_file): """Test successful file retrieval.""" # Upload file @@ -197,13 +187,11 @@ class TestOpenAIFilesAPI: assert retrieved_file.created_at == uploaded_file.created_at assert retrieved_file.expires_at == uploaded_file.expires_at - @pytest.mark.asyncio async def test_retrieve_file_not_found(self, files_provider): """Test retrieving a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_retrieve_file("file-nonexistent") - @pytest.mark.asyncio async def test_retrieve_file_content_success(self, files_provider, sample_text_file): """Test successful file content retrieval.""" # Upload file @@ -217,13 +205,11 @@ class TestOpenAIFilesAPI: # Verify content assert content.body == sample_text_file.content - @pytest.mark.asyncio async def test_retrieve_file_content_not_found(self, files_provider): """Test retrieving content of a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_retrieve_file_content("file-nonexistent") - @pytest.mark.asyncio async def test_delete_file_success(self, files_provider, sample_text_file): """Test successful file deletion.""" # Upload file @@ -245,13 +231,11 @@ class TestOpenAIFilesAPI: with pytest.raises(ValueError, match=f"File with id {uploaded_file.id} not found"): await files_provider.openai_retrieve_file(uploaded_file.id) - @pytest.mark.asyncio async def test_delete_file_not_found(self, files_provider): """Test deleting a non-existent file.""" with pytest.raises(ValueError, match="File with id file-nonexistent not found"): await files_provider.openai_delete_file("file-nonexistent") - @pytest.mark.asyncio async def test_file_persistence_across_operations(self, files_provider, sample_text_file): """Test that files persist correctly across multiple operations.""" # Upload file @@ -279,7 +263,6 @@ class TestOpenAIFilesAPI: files_list = await files_provider.openai_list_files() assert len(files_list.data) == 0 - @pytest.mark.asyncio async def test_multiple_files_operations(self, files_provider, sample_text_file, sample_json_file): """Test operations with multiple files.""" # Upload multiple files @@ -302,7 +285,6 @@ class TestOpenAIFilesAPI: content = await files_provider.openai_retrieve_file_content(file2.id) assert content.body == sample_json_file.content - @pytest.mark.asyncio async def test_file_id_uniqueness(self, files_provider, sample_text_file): """Test that each uploaded file gets a unique ID.""" file_ids = set() @@ -316,7 +298,6 @@ class TestOpenAIFilesAPI: file_ids.add(uploaded_file.id) assert uploaded_file.id.startswith("file-") - @pytest.mark.asyncio async def test_file_no_filename_handling(self, files_provider): """Test handling files with no filename.""" file_without_name = MockUploadFile(b"content", None) # No filename @@ -327,7 +308,6 @@ class TestOpenAIFilesAPI: assert uploaded_file.filename == "uploaded_file" # Default filename - @pytest.mark.asyncio async def test_after_pagination_works(self, files_provider, sample_text_file): """Test that 'after' pagination works correctly.""" # Upload multiple files to test pagination diff --git a/tests/unit/fixtures.py b/tests/unit/fixtures.py index 4e50c5e08..7174d2e78 100644 --- a/tests/unit/fixtures.py +++ b/tests/unit/fixtures.py @@ -4,14 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest_asyncio +import pytest from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry, DiskDistributionRegistry from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.sqlite import SqliteKVStoreImpl -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def sqlite_kvstore(tmp_path): db_path = tmp_path / "test_kv.db" kvstore_config = SqliteKVStoreConfig(db_path=db_path.as_posix()) @@ -20,14 +20,14 @@ async def sqlite_kvstore(tmp_path): yield kvstore -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def disk_dist_registry(sqlite_kvstore): registry = DiskDistributionRegistry(sqlite_kvstore) await registry.initialize() yield registry -@pytest_asyncio.fixture(scope="function") +@pytest.fixture(scope="function") async def cached_disk_dist_registry(sqlite_kvstore): registry = CachedDiskDistributionRegistry(sqlite_kvstore) await registry.initialize() diff --git a/tests/unit/providers/agent/test_meta_reference_agent.py b/tests/unit/providers/agent/test_meta_reference_agent.py index 7a7d52892..c06d9ab0e 100644 --- a/tests/unit/providers/agent/test_meta_reference_agent.py +++ b/tests/unit/providers/agent/test_meta_reference_agent.py @@ -8,7 +8,6 @@ from datetime import datetime from unittest.mock import AsyncMock import pytest -import pytest_asyncio from llama_stack.apis.agents import ( Agent, @@ -50,7 +49,7 @@ def config(tmp_path): ) -@pytest_asyncio.fixture +@pytest.fixture async def agents_impl(config, mock_apis): impl = MetaReferenceAgentsImpl( config, @@ -117,7 +116,6 @@ def sample_agent_config(): ) -@pytest.mark.asyncio async def test_create_agent(agents_impl, sample_agent_config): response = await agents_impl.create_agent(sample_agent_config) @@ -132,7 +130,6 @@ async def test_create_agent(agents_impl, sample_agent_config): assert isinstance(agent_info.created_at, datetime) -@pytest.mark.asyncio async def test_get_agent(agents_impl, sample_agent_config): create_response = await agents_impl.create_agent(sample_agent_config) agent_id = create_response.agent_id @@ -146,7 +143,6 @@ async def test_get_agent(agents_impl, sample_agent_config): assert isinstance(agent.created_at, datetime) -@pytest.mark.asyncio async def test_list_agents(agents_impl, sample_agent_config): agent1_response = await agents_impl.create_agent(sample_agent_config) agent2_response = await agents_impl.create_agent(sample_agent_config) @@ -160,7 +156,6 @@ async def test_list_agents(agents_impl, sample_agent_config): assert agent2_response.agent_id in agent_ids -@pytest.mark.asyncio @pytest.mark.parametrize("enable_session_persistence", [True, False]) async def test_create_agent_session_persistence(agents_impl, sample_agent_config, enable_session_persistence): # Create an agent with specified persistence setting @@ -188,7 +183,6 @@ async def test_create_agent_session_persistence(agents_impl, sample_agent_config await agents_impl.get_agents_session(agent_id, session_response.session_id) -@pytest.mark.asyncio @pytest.mark.parametrize("enable_session_persistence", [True, False]) async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, enable_session_persistence): # Create an agent with specified persistence setting @@ -221,7 +215,6 @@ async def test_list_agent_sessions_persistence(agents_impl, sample_agent_config, assert session2.session_id in {s["session_id"] for s in sessions.data} -@pytest.mark.asyncio async def test_delete_agent(agents_impl, sample_agent_config): # Create an agent response = await agents_impl.create_agent(sample_agent_config) diff --git a/tests/unit/providers/agents/meta_reference/test_openai_responses.py b/tests/unit/providers/agents/meta_reference/test_openai_responses.py index 0d1ef8eca..6485e3512 100644 --- a/tests/unit/providers/agents/meta_reference/test_openai_responses.py +++ b/tests/unit/providers/agents/meta_reference/test_openai_responses.py @@ -122,7 +122,6 @@ async def fake_stream(fixture: str = "simple_chat_completion.yaml"): ) -@pytest.mark.asyncio async def test_create_openai_response_with_string_input(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a simple string input.""" # Setup @@ -155,7 +154,6 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m assert result.output[0].content[0].text == "Dublin" -@pytest.mark.asyncio async def test_create_openai_response_with_string_input_with_tools(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a simple string input and tools.""" # Setup @@ -224,7 +222,6 @@ async def test_create_openai_response_with_string_input_with_tools(openai_respon assert result.output[1].content[0].annotations == [] -@pytest.mark.asyncio async def test_create_openai_response_with_tool_call_type_none(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with a tool call response that has a type of None.""" # Setup @@ -294,7 +291,6 @@ async def test_create_openai_response_with_tool_call_type_none(openai_responses_ assert chunks[1].response.output[0].name == "get_weather" -@pytest.mark.asyncio async def test_create_openai_response_with_multiple_messages(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with multiple messages.""" # Setup @@ -340,7 +336,6 @@ async def test_create_openai_response_with_multiple_messages(openai_responses_im assert isinstance(inference_messages[i], OpenAIDeveloperMessageParam) -@pytest.mark.asyncio async def test_prepend_previous_response_none(openai_responses_impl): """Test prepending no previous response to a new response.""" @@ -348,7 +343,6 @@ async def test_prepend_previous_response_none(openai_responses_impl): assert input == "fake_input" -@pytest.mark.asyncio async def test_prepend_previous_response_basic(openai_responses_impl, mock_responses_store): """Test prepending a basic previous response to a new response.""" @@ -388,7 +382,6 @@ async def test_prepend_previous_response_basic(openai_responses_impl, mock_respo assert input[2].content == "fake_input" -@pytest.mark.asyncio async def test_prepend_previous_response_web_search(openai_responses_impl, mock_responses_store): """Test prepending a web search previous response to a new response.""" input_item_message = OpenAIResponseMessage( @@ -434,7 +427,6 @@ async def test_prepend_previous_response_web_search(openai_responses_impl, mock_ assert input[3].content == "fake_input" -@pytest.mark.asyncio async def test_create_openai_response_with_instructions(openai_responses_impl, mock_inference_api): # Setup input_text = "What is the capital of Ireland?" @@ -463,7 +455,6 @@ async def test_create_openai_response_with_instructions(openai_responses_impl, m assert sent_messages[1].content == input_text -@pytest.mark.asyncio async def test_create_openai_response_with_instructions_and_multiple_messages( openai_responses_impl, mock_inference_api ): @@ -508,7 +499,6 @@ async def test_create_openai_response_with_instructions_and_multiple_messages( assert sent_messages[3].content == "Which is the largest?" -@pytest.mark.asyncio async def test_create_openai_response_with_instructions_and_previous_response( openai_responses_impl, mock_responses_store, mock_inference_api ): @@ -565,7 +555,6 @@ async def test_create_openai_response_with_instructions_and_previous_response( assert sent_messages[3].content == "Which is the largest?" -@pytest.mark.asyncio async def test_list_openai_response_input_items_delegation(openai_responses_impl, mock_responses_store): """Test that list_openai_response_input_items properly delegates to responses_store with correct parameters.""" # Setup @@ -601,7 +590,6 @@ async def test_list_openai_response_input_items_delegation(openai_responses_impl assert result.data[0].id == "msg_123" -@pytest.mark.asyncio async def test_responses_store_list_input_items_logic(): """Test ResponsesStore list_response_input_items logic - mocks get_response_object to test actual ordering/limiting.""" @@ -680,7 +668,6 @@ async def test_responses_store_list_input_items_logic(): assert len(result.data) == 0 # Should return no items -@pytest.mark.asyncio async def test_store_response_uses_rehydrated_input_with_previous_response( openai_responses_impl, mock_responses_store, mock_inference_api ): @@ -747,7 +734,6 @@ async def test_store_response_uses_rehydrated_input_with_previous_response( assert result.status == "completed" -@pytest.mark.asyncio @pytest.mark.parametrize( "text_format, response_format", [ @@ -787,7 +773,6 @@ async def test_create_openai_response_with_text_format( assert first_call.kwargs["response_format"] == response_format -@pytest.mark.asyncio async def test_create_openai_response_with_invalid_text_format(openai_responses_impl, mock_inference_api): """Test creating an OpenAI response with an invalid text format.""" # Setup diff --git a/tests/unit/providers/agents/test_persistence_access_control.py b/tests/unit/providers/agents/test_persistence_access_control.py index 656d1e53c..26001fcf1 100644 --- a/tests/unit/providers/agents/test_persistence_access_control.py +++ b/tests/unit/providers/agents/test_persistence_access_control.py @@ -9,7 +9,6 @@ from datetime import datetime from unittest.mock import patch import pytest -import pytest_asyncio from llama_stack.apis.agents import Turn from llama_stack.apis.inference import CompletionMessage, StopReason @@ -17,13 +16,12 @@ from llama_stack.distribution.datatypes import User from llama_stack.providers.inline.agents.meta_reference.persistence import AgentPersistence, AgentSessionInfo -@pytest_asyncio.fixture +@pytest.fixture async def test_setup(sqlite_kvstore): agent_persistence = AgentPersistence(agent_id="test_agent", kvstore=sqlite_kvstore, policy={}) yield agent_persistence -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_session_creation_with_access_attributes(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -44,7 +42,6 @@ async def test_session_creation_with_access_attributes(mock_get_authenticated_us assert session_info.owner.attributes["teams"] == ["ai-team"] -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_session_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -79,7 +76,6 @@ async def test_session_access_control(mock_get_authenticated_user, test_setup): assert retrieved_session is None -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_turn_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup @@ -133,7 +129,6 @@ async def test_turn_access_control(mock_get_authenticated_user, test_setup): await agent_persistence.get_session_turns(session_id) -@pytest.mark.asyncio @patch("llama_stack.providers.inline.agents.meta_reference.persistence.get_authenticated_user") async def test_tool_call_and_infer_iters_access_control(mock_get_authenticated_user, test_setup): agent_persistence = test_setup diff --git a/tests/unit/providers/inference/test_inference_client_caching.py b/tests/unit/providers/inference/test_inference_client_caching.py new file mode 100644 index 000000000..c9a931d47 --- /dev/null +++ b/tests/unit/providers/inference/test_inference_client_caching.py @@ -0,0 +1,73 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +from unittest.mock import MagicMock + +from llama_stack.distribution.request_headers import request_provider_data_context +from llama_stack.providers.remote.inference.groq.config import GroqConfig +from llama_stack.providers.remote.inference.groq.groq import GroqInferenceAdapter +from llama_stack.providers.remote.inference.openai.config import OpenAIConfig +from llama_stack.providers.remote.inference.openai.openai import OpenAIInferenceAdapter +from llama_stack.providers.remote.inference.together.config import TogetherImplConfig +from llama_stack.providers.remote.inference.together.together import TogetherInferenceAdapter + + +def test_groq_provider_openai_client_caching(): + """Ensure the Groq provider does not cache api keys across client requests""" + + config = GroqConfig() + inference_adapter = GroqInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context( + {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} + ): + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key + + +def test_openai_provider_openai_client_caching(): + """Ensure the OpenAI provider does not cache api keys across client requests""" + + config = OpenAIConfig() + inference_adapter = OpenAIInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context( + {"x-llamastack-provider-data": json.dumps({inference_adapter.provider_data_api_key_field: api_key})} + ): + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key + + +def test_together_provider_openai_client_caching(): + """Ensure the Together provider does not cache api keys across client requests""" + + config = TogetherImplConfig() + inference_adapter = TogetherInferenceAdapter(config) + + inference_adapter.__provider_spec__ = MagicMock() + inference_adapter.__provider_spec__.provider_data_validator = ( + "llama_stack.providers.remote.inference.together.TogetherProviderDataValidator" + ) + + for api_key in ["test1", "test2"]: + with request_provider_data_context({"x-llamastack-provider-data": json.dumps({"together_api_key": api_key})}): + together_client = inference_adapter._get_client() + assert together_client.client.api_key == api_key + openai_client = inference_adapter._get_openai_client() + assert openai_client.api_key == api_key diff --git a/tests/unit/providers/inference/test_remote_vllm.py b/tests/unit/providers/inference/test_remote_vllm.py index eaa9b40da..5c2ad03ab 100644 --- a/tests/unit/providers/inference/test_remote_vllm.py +++ b/tests/unit/providers/inference/test_remote_vllm.py @@ -14,7 +14,6 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest -import pytest_asyncio from openai.types.chat.chat_completion_chunk import ( ChatCompletionChunk as OpenAIChatCompletionChunk, ) @@ -103,7 +102,7 @@ def mock_openai_models_list(): yield mock_list -@pytest_asyncio.fixture(scope="module") +@pytest.fixture(scope="module") async def vllm_inference_adapter(): config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345") inference_adapter = VLLMInferenceAdapter(config) @@ -112,7 +111,6 @@ async def vllm_inference_adapter(): return inference_adapter -@pytest.mark.asyncio async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inference_adapter): async def mock_openai_models(): yield OpenAIModel(id="foo", created=1, object="model", owned_by="test") @@ -125,7 +123,6 @@ async def test_register_model_checks_vllm(mock_openai_models_list, vllm_inferenc mock_openai_models_list.assert_called() -@pytest.mark.asyncio async def test_old_vllm_tool_choice(vllm_inference_adapter): """ Test that we set tool_choice to none when no tools are in use @@ -149,7 +146,6 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter): assert request.tool_config.tool_choice == ToolChoice.none -@pytest.mark.asyncio async def test_tool_call_response(vllm_inference_adapter): """Verify that tool call arguments from a CompletionMessage are correctly converted into the expected JSON format.""" @@ -192,7 +188,6 @@ async def test_tool_call_response(vllm_inference_adapter): ] -@pytest.mark.asyncio async def test_tool_call_delta_empty_tool_call_buf(): """ Test that we don't generate extra chunks when processing a @@ -222,7 +217,6 @@ async def test_tool_call_delta_empty_tool_call_buf(): assert chunks[1].event.stop_reason == StopReason.end_of_turn -@pytest.mark.asyncio async def test_tool_call_delta_streaming_arguments_dict(): async def mock_stream(): mock_chunk_1 = OpenAIChatCompletionChunk( @@ -297,7 +291,6 @@ async def test_tool_call_delta_streaming_arguments_dict(): assert chunks[2].event.event_type.value == "complete" -@pytest.mark.asyncio async def test_multiple_tool_calls(): async def mock_stream(): mock_chunk_1 = OpenAIChatCompletionChunk( @@ -376,7 +369,6 @@ async def test_multiple_tool_calls(): assert chunks[3].event.event_type.value == "complete" -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_no_choices(): """ Test that we don't error out when vLLM returns no choices for a @@ -401,6 +393,7 @@ async def test_process_vllm_chat_completion_stream_response_no_choices(): assert chunks[0].event.event_type.value == "start" +@pytest.mark.allow_network def test_chat_completion_doesnt_block_event_loop(caplog): loop = asyncio.new_event_loop() loop.set_debug(True) @@ -453,7 +446,6 @@ def test_chat_completion_doesnt_block_event_loop(caplog): assert not asyncio_warnings -@pytest.mark.asyncio async def test_get_params_empty_tools(vllm_inference_adapter): request = ChatCompletionRequest( tools=[], @@ -464,7 +456,6 @@ async def test_get_params_empty_tools(vllm_inference_adapter): assert "tools" not in params -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_chunk(): """ Tests the edge case where the model returns the arguments for the tool call in the same chunk that @@ -543,7 +534,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_call_args_last_ assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_no_finish_reason(): """ Tests the edge case where the model requests a tool call and stays idle without explicitly providing the @@ -596,7 +586,6 @@ async def test_process_vllm_chat_completion_stream_response_no_finish_reason(): assert chunks[-2].event.delta.tool_call.arguments == mock_tool_arguments -@pytest.mark.asyncio async def test_process_vllm_chat_completion_stream_response_tool_without_args(): """ Tests the edge case where no arguments are provided for the tool call. @@ -645,7 +634,6 @@ async def test_process_vllm_chat_completion_stream_response_tool_without_args(): assert chunks[-2].event.delta.tool_call.arguments == {} -@pytest.mark.asyncio async def test_health_status_success(vllm_inference_adapter): """ Test the health method of VLLM InferenceAdapter when the connection is successful. @@ -679,7 +667,6 @@ async def test_health_status_success(vllm_inference_adapter): mock_models.list.assert_called_once() -@pytest.mark.asyncio async def test_health_status_failure(vllm_inference_adapter): """ Test the health method of VLLM InferenceAdapter when the connection fails. diff --git a/tests/unit/providers/utils/inference/test_openai_compat.py b/tests/unit/providers/utils/inference/test_openai_compat.py index 3598e4810..f57f6c9b3 100644 --- a/tests/unit/providers/utils/inference/test_openai_compat.py +++ b/tests/unit/providers/utils/inference/test_openai_compat.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import pytest from llama_stack.apis.common.content_types import TextContentItem from llama_stack.apis.inference import ( @@ -23,7 +22,6 @@ from llama_stack.providers.utils.inference.openai_compat import ( ) -@pytest.mark.asyncio async def test_convert_message_to_openai_dict(): message = UserMessage(content=[TextContentItem(text="Hello, world!")], role="user") assert await convert_message_to_openai_dict(message) == { @@ -33,7 +31,6 @@ async def test_convert_message_to_openai_dict(): # Test convert_message_to_openai_dict with a tool call -@pytest.mark.asyncio async def test_convert_message_to_openai_dict_with_tool_call(): message = CompletionMessage( content="", @@ -54,7 +51,6 @@ async def test_convert_message_to_openai_dict_with_tool_call(): } -@pytest.mark.asyncio async def test_convert_message_to_openai_dict_with_builtin_tool_call(): message = CompletionMessage( content="", @@ -80,7 +76,6 @@ async def test_convert_message_to_openai_dict_with_builtin_tool_call(): } -@pytest.mark.asyncio async def test_openai_messages_to_messages_with_content_str(): openai_messages = [ OpenAISystemMessageParam(content="system message"), @@ -98,7 +93,6 @@ async def test_openai_messages_to_messages_with_content_str(): assert llama_messages[2].content == "assistant message" -@pytest.mark.asyncio async def test_openai_messages_to_messages_with_content_list(): openai_messages = [ OpenAISystemMessageParam(content=[OpenAIChatCompletionContentPartTextParam(text="system message")]), diff --git a/tests/unit/providers/utils/memory/test_vector_store.py b/tests/unit/providers/utils/memory/test_vector_store.py index 220c21994..90b229262 100644 --- a/tests/unit/providers/utils/memory/test_vector_store.py +++ b/tests/unit/providers/utils/memory/test_vector_store.py @@ -13,7 +13,6 @@ from llama_stack.apis.tools import RAGDocument from llama_stack.providers.utils.memory.vector_store import content_from_data_and_mime_type, content_from_doc -@pytest.mark.asyncio async def test_content_from_doc_with_url(): """Test extracting content from RAGDocument with URL content.""" mock_url = URL(uri="https://example.com") @@ -33,7 +32,6 @@ async def test_content_from_doc_with_url(): mock_instance.get.assert_called_once_with(mock_url.uri) -@pytest.mark.asyncio async def test_content_from_doc_with_pdf_url(): """Test extracting content from RAGDocument with URL pointing to a PDF.""" mock_url = URL(uri="https://example.com/document.pdf") @@ -58,7 +56,6 @@ async def test_content_from_doc_with_pdf_url(): mock_parse_pdf.assert_called_once_with(b"PDF binary data") -@pytest.mark.asyncio async def test_content_from_doc_with_data_url(): """Test extracting content from RAGDocument with data URL content.""" data_url = "data:text/plain;base64,SGVsbG8gV29ybGQ=" # "Hello World" base64 encoded @@ -74,7 +71,6 @@ async def test_content_from_doc_with_data_url(): mock_content_from_data.assert_called_once_with(data_url) -@pytest.mark.asyncio async def test_content_from_doc_with_string(): """Test extracting content from RAGDocument with string content.""" content_string = "This is plain text content" @@ -85,7 +81,6 @@ async def test_content_from_doc_with_string(): assert result == content_string -@pytest.mark.asyncio async def test_content_from_doc_with_string_url(): """Test extracting content from RAGDocument with string URL content.""" url_string = "https://example.com" @@ -105,7 +100,6 @@ async def test_content_from_doc_with_string_url(): mock_instance.get.assert_called_once_with(url_string) -@pytest.mark.asyncio async def test_content_from_doc_with_string_pdf_url(): """Test extracting content from RAGDocument with string URL pointing to a PDF.""" url_string = "https://example.com/document.pdf" @@ -130,7 +124,6 @@ async def test_content_from_doc_with_string_pdf_url(): mock_parse_pdf.assert_called_once_with(b"PDF binary data") -@pytest.mark.asyncio async def test_content_from_doc_with_interleaved_content(): """Test extracting content from RAGDocument with InterleavedContent (the new case added in the commit).""" interleaved_content = [TextContentItem(text="First item"), TextContentItem(text="Second item")] diff --git a/tests/unit/providers/utils/test_model_registry.py b/tests/unit/providers/utils/test_model_registry.py index 10fa1e075..e11f95d49 100644 --- a/tests/unit/providers/utils/test_model_registry.py +++ b/tests/unit/providers/utils/test_model_registry.py @@ -87,18 +87,15 @@ def helper(known_provider_model: ProviderModelEntry, known_provider_model2: Prov return ModelRegistryHelper([known_provider_model, known_provider_model2]) -@pytest.mark.asyncio async def test_lookup_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: assert helper.get_provider_model_id(unknown_model.model_id) is None -@pytest.mark.asyncio async def test_register_unknown_provider_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: with pytest.raises(ValueError): await helper.register_model(unknown_model) -@pytest.mark.asyncio async def test_register_model(helper: ModelRegistryHelper, known_model: Model) -> None: model = Model( provider_id=known_model.provider_id, @@ -110,7 +107,6 @@ async def test_register_model(helper: ModelRegistryHelper, known_model: Model) - assert helper.get_provider_model_id(model.model_id) == model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_from_alias(helper: ModelRegistryHelper, known_model: Model) -> None: model = Model( provider_id=known_model.provider_id, @@ -122,13 +118,11 @@ async def test_register_model_from_alias(helper: ModelRegistryHelper, known_mode assert helper.get_provider_model_id(model.model_id) == known_model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_existing(helper: ModelRegistryHelper, known_model: Model) -> None: await helper.register_model(known_model) assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_resource_id -@pytest.mark.asyncio async def test_register_model_existing_different( helper: ModelRegistryHelper, known_model: Model, known_model2: Model ) -> None: @@ -137,7 +131,6 @@ async def test_register_model_existing_different( await helper.register_model(known_model) -@pytest.mark.asyncio async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) -> None: await helper.register_model(known_model) # duplicate entry assert helper.get_provider_model_id(known_model.model_id) == known_model.provider_model_id @@ -145,18 +138,15 @@ async def test_unregister_model(helper: ModelRegistryHelper, known_model: Model) assert helper.get_provider_model_id(known_model.model_id) is None -@pytest.mark.asyncio async def test_unregister_unknown_model(helper: ModelRegistryHelper, unknown_model: Model) -> None: with pytest.raises(ValueError): await helper.unregister_model(unknown_model.model_id) -@pytest.mark.asyncio async def test_register_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None: assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id -@pytest.mark.asyncio async def test_unregister_model_during_init(helper: ModelRegistryHelper, known_model: Model) -> None: assert helper.get_provider_model_id(known_model.provider_resource_id) == known_model.provider_model_id await helper.unregister_model(known_model.provider_resource_id) diff --git a/tests/unit/providers/utils/test_scheduler.py b/tests/unit/providers/utils/test_scheduler.py index 25b4935de..e5ee74bfa 100644 --- a/tests/unit/providers/utils/test_scheduler.py +++ b/tests/unit/providers/utils/test_scheduler.py @@ -11,7 +11,6 @@ import pytest from llama_stack.providers.utils.scheduler import JobStatus, Scheduler -@pytest.mark.asyncio async def test_scheduler_unknown_backend(): with pytest.raises(ValueError): Scheduler(backend="unknown") @@ -26,7 +25,6 @@ async def wait_for_job_completed(sched: Scheduler, job_id: str) -> None: raise TimeoutError(f"Job {job_id} did not complete in time.") -@pytest.mark.asyncio async def test_scheduler_naive(): sched = Scheduler() @@ -87,7 +85,6 @@ async def test_scheduler_naive(): assert job.logs[0][0] < job.logs[1][0] -@pytest.mark.asyncio async def test_scheduler_naive_handler_raises(): sched = Scheduler() diff --git a/tests/unit/providers/vector_io/test_faiss.py b/tests/unit/providers/vector_io/test_faiss.py index 8348b84e3..90108d7a0 100644 --- a/tests/unit/providers/vector_io/test_faiss.py +++ b/tests/unit/providers/vector_io/test_faiss.py @@ -9,7 +9,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import numpy as np import pytest -import pytest_asyncio from llama_stack.apis.files import Files from llama_stack.apis.inference import EmbeddingsResponse, Inference @@ -91,13 +90,13 @@ def faiss_config(): return config -@pytest_asyncio.fixture +@pytest.fixture async def faiss_index(embedding_dimension): index = await FaissIndex.create(dimension=embedding_dimension) yield index -@pytest_asyncio.fixture +@pytest.fixture async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> FaissVectorIOAdapter: # Create the adapter adapter = FaissVectorIOAdapter(config=faiss_config, inference_api=mock_inference_api, files_api=mock_files_api) @@ -113,7 +112,6 @@ async def faiss_adapter(faiss_config, mock_inference_api, mock_files_api) -> Fai yield adapter -@pytest.mark.asyncio async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_identical( faiss_index, sample_chunks, sample_embeddings, embedding_dimension ): @@ -136,7 +134,6 @@ async def test_faiss_query_vector_returns_infinity_when_query_and_embedding_are_ assert response.chunks[1] == sample_chunks[1] -@pytest.mark.asyncio async def test_health_success(): """Test that the health check returns OK status when faiss is working correctly.""" # Create a fresh instance of FaissVectorIOAdapter for testing @@ -160,7 +157,6 @@ async def test_health_success(): mock_index_flat.assert_called_once_with(128) # VECTOR_DIMENSION is 128 -@pytest.mark.asyncio async def test_health_failure(): """Test that the health check returns ERROR status when faiss encounters an error.""" # Create a fresh instance of FaissVectorIOAdapter for testing diff --git a/tests/unit/providers/vector_io/test_qdrant.py b/tests/unit/providers/vector_io/test_qdrant.py index 6902c8850..d3ffe711c 100644 --- a/tests/unit/providers/vector_io/test_qdrant.py +++ b/tests/unit/providers/vector_io/test_qdrant.py @@ -10,7 +10,6 @@ from typing import Any from unittest.mock import AsyncMock, MagicMock, patch import pytest -import pytest_asyncio from llama_stack.apis.inference import EmbeddingsResponse, Inference from llama_stack.apis.vector_io import ( @@ -68,7 +67,7 @@ def mock_api_service(sample_embeddings): return mock_api_service -@pytest_asyncio.fixture +@pytest.fixture async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, loop) -> QdrantVectorIOAdapter: adapter = QdrantVectorIOAdapter(config=qdrant_config, inference_api=mock_api_service) adapter.vector_db_store = mock_vector_db_store @@ -80,7 +79,6 @@ async def qdrant_adapter(qdrant_config, mock_vector_db_store, mock_api_service, __QUERY = "Sample query" -@pytest.mark.asyncio @pytest.mark.parametrize("max_query_chunks, expected_chunks", [(2, 2), (100, 60)]) async def test_qdrant_adapter_returns_expected_chunks( qdrant_adapter: QdrantVectorIOAdapter, @@ -111,7 +109,6 @@ def _prepare_for_json(value: Any) -> str: @patch("llama_stack.providers.utils.telemetry.trace_protocol._prepare_for_json", new=_prepare_for_json) -@pytest.mark.asyncio async def test_qdrant_register_and_unregister_vector_db( qdrant_adapter: QdrantVectorIOAdapter, mock_vector_db, diff --git a/tests/unit/providers/vector_io/test_sqlite_vec.py b/tests/unit/providers/vector_io/test_sqlite_vec.py index 8579c31bb..a61eeeeca 100644 --- a/tests/unit/providers/vector_io/test_sqlite_vec.py +++ b/tests/unit/providers/vector_io/test_sqlite_vec.py @@ -8,7 +8,6 @@ import asyncio import numpy as np import pytest -import pytest_asyncio from llama_stack.apis.vector_io import Chunk, QueryChunksResponse from llama_stack.providers.inline.vector_io.sqlite_vec.sqlite_vec import ( @@ -34,7 +33,7 @@ def loop(): return asyncio.new_event_loop() -@pytest_asyncio.fixture +@pytest.fixture async def sqlite_vec_index(embedding_dimension, tmp_path_factory): temp_dir = tmp_path_factory.getbasetemp() db_path = str(temp_dir / "test_sqlite.db") @@ -43,14 +42,12 @@ async def sqlite_vec_index(embedding_dimension, tmp_path_factory): await index.delete() -@pytest.mark.asyncio async def test_query_chunk_metadata(sqlite_vec_index, sample_chunks_with_metadata, sample_embeddings_with_metadata): await sqlite_vec_index.add_chunks(sample_chunks_with_metadata, sample_embeddings_with_metadata) response = await sqlite_vec_index.query_vector(sample_embeddings_with_metadata[-1], k=2, score_threshold=0.0) assert response.chunks[0].chunk_metadata == sample_chunks_with_metadata[-1].chunk_metadata -@pytest.mark.asyncio async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) query_string = "Sentence 5" @@ -68,7 +65,6 @@ async def test_query_chunks_full_text_search(sqlite_vec_index, sample_chunks, sa assert len(response_no_results.chunks) == 0, f"Expected 0 results, but got {len(response_no_results.chunks)}" -@pytest.mark.asyncio async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -90,7 +86,6 @@ async def test_query_chunks_hybrid(sqlite_vec_index, sample_chunks, sample_embed assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_index, sample_chunks, sample_embeddings): # Re-initialize with a clean index await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -103,7 +98,6 @@ async def test_query_chunks_full_text_search_k_greater_than_results(sqlite_vec_i assert any("Sentence 1 from document 0" in chunk.content for chunk in response.chunks), "Expected chunk not found" -@pytest.mark.asyncio async def test_chunk_id_conflict(sqlite_vec_index, sample_chunks, embedding_dimension): """Test that chunk IDs do not conflict across batches when inserting chunks.""" # Reduce batch size to force multiple batches for same document @@ -134,7 +128,6 @@ async def sqlite_vec_adapter(sqlite_connection): await adapter.shutdown() -@pytest.mark.asyncio async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search when keyword search returns no matches - should still return vector results.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -163,7 +156,6 @@ async def test_query_chunks_hybrid_no_keyword_matches(sqlite_vec_index, sample_c assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search with a high score threshold.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -185,7 +177,6 @@ async def test_query_chunks_hybrid_score_threshold(sqlite_vec_index, sample_chun assert len(response.chunks) == 0 -@pytest.mark.asyncio async def test_query_chunks_hybrid_different_embedding( sqlite_vec_index, sample_chunks, sample_embeddings, embedding_dimension ): @@ -211,7 +202,6 @@ async def test_query_chunks_hybrid_different_embedding( assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, sample_embeddings): """Test that RRF properly combines rankings when documents appear in both search methods.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -236,7 +226,6 @@ async def test_query_chunks_hybrid_rrf_ranking(sqlite_vec_index, sample_chunks, assert all(response.scores[i] >= response.scores[i + 1] for i in range(len(response.scores) - 1)) -@pytest.mark.asyncio async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -284,7 +273,6 @@ async def test_query_chunks_hybrid_score_selection(sqlite_vec_index, sample_chun assert response.scores[0] == pytest.approx(2.0 / 61.0, rel=1e-6) # Should behave like RRF -@pytest.mark.asyncio async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks, sample_embeddings): """Test hybrid search with documents that appear in only one search method.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -313,7 +301,6 @@ async def test_query_chunks_hybrid_mixed_results(sqlite_vec_index, sample_chunks assert "document-2" in doc_ids # From keyword search -@pytest.mark.asyncio async def test_query_chunks_hybrid_weighted_reranker_parametrization( sqlite_vec_index, sample_chunks, sample_embeddings ): @@ -369,7 +356,6 @@ async def test_query_chunks_hybrid_weighted_reranker_parametrization( ) -@pytest.mark.asyncio async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_chunks, sample_embeddings): """Test RRFReRanker with different impact factors.""" await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -401,7 +387,6 @@ async def test_query_chunks_hybrid_rrf_impact_factor(sqlite_vec_index, sample_ch assert response.scores[0] == pytest.approx(2.0 / 101.0, rel=1e-6) -@pytest.mark.asyncio async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, sample_embeddings): await sqlite_vec_index.add_chunks(sample_chunks, sample_embeddings) @@ -445,7 +430,6 @@ async def test_query_chunks_hybrid_edge_cases(sqlite_vec_index, sample_chunks, s assert len(response.chunks) <= 100 -@pytest.mark.asyncio async def test_query_chunks_hybrid_tie_breaking( sqlite_vec_index, sample_embeddings, embedding_dimension, tmp_path_factory ): diff --git a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py index 0ad98405e..bf7663d2e 100644 --- a/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py +++ b/tests/unit/providers/vector_io/test_vector_io_openai_vector_stores.py @@ -25,12 +25,10 @@ from llama_stack.providers.remote.vector_io.milvus.milvus import VECTOR_DBS_PREF # -v -s --tb=short --disable-warnings --asyncio-mode=auto -@pytest.mark.asyncio async def test_initialize_index(vector_index): await vector_index.initialize() -@pytest.mark.asyncio async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embeddings): vector_index.delete() vector_index.initialize() @@ -40,7 +38,6 @@ async def test_add_chunks_query_vector(vector_index, sample_chunks, sample_embed vector_index.delete() -@pytest.mark.asyncio async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimension): embeddings = np.random.rand(len(sample_chunks), embedding_dimension).astype(np.float32) await vector_index.add_chunks(sample_chunks, embeddings) @@ -54,7 +51,6 @@ async def test_chunk_id_conflict(vector_index, sample_chunks, embedding_dimensio assert len(contents) == len(set(contents)) -@pytest.mark.asyncio async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): key = f"{VECTOR_DBS_PREFIX}db1" dummy = VectorDB( @@ -65,7 +61,6 @@ async def test_initialize_adapter_with_existing_kvstore(vector_io_adapter): await vector_io_adapter.initialize() -@pytest.mark.asyncio async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.initialize() dummy = VectorDB( @@ -79,7 +74,6 @@ async def test_persistence_across_adapter_restarts(vector_io_adapter): await vector_io_adapter.shutdown() -@pytest.mark.asyncio async def test_register_and_unregister_vector_db(vector_io_adapter): unique_id = f"foo_db_{np.random.randint(1e6)}" dummy = VectorDB( @@ -92,14 +86,12 @@ async def test_register_and_unregister_vector_db(vector_io_adapter): assert dummy.identifier not in vector_io_adapter.cache -@pytest.mark.asyncio async def test_query_unregistered_raises(vector_io_adapter): fake_emb = np.zeros(8, dtype=np.float32) with pytest.raises(ValueError): await vector_io_adapter.query_chunks("no_such_db", fake_emb) -@pytest.mark.asyncio async def test_insert_chunks_calls_underlying_index(vector_io_adapter): fake_index = AsyncMock() vector_io_adapter.cache["db1"] = fake_index @@ -110,7 +102,6 @@ async def test_insert_chunks_calls_underlying_index(vector_io_adapter): fake_index.insert_chunks.assert_awaited_once_with(chunks) -@pytest.mark.asyncio async def test_insert_chunks_missing_db_raises(vector_io_adapter): vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) @@ -118,7 +109,6 @@ async def test_insert_chunks_missing_db_raises(vector_io_adapter): await vector_io_adapter.insert_chunks("db_not_exist", []) -@pytest.mark.asyncio async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter): expected = QueryChunksResponse(chunks=[Chunk(content="c1")], scores=[0.1]) fake_index = AsyncMock(query_chunks=AsyncMock(return_value=expected)) @@ -130,7 +120,6 @@ async def test_query_chunks_calls_underlying_index_and_returns(vector_io_adapter assert response is expected -@pytest.mark.asyncio async def test_query_chunks_missing_db_raises(vector_io_adapter): vector_io_adapter._get_and_cache_vector_db_index = AsyncMock(return_value=None) @@ -138,7 +127,6 @@ async def test_query_chunks_missing_db_raises(vector_io_adapter): await vector_io_adapter.query_chunks("db_missing", "q", None) -@pytest.mark.asyncio async def test_save_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -155,7 +143,6 @@ async def test_save_openai_vector_store(vector_io_adapter): assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store -@pytest.mark.asyncio async def test_update_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -172,7 +159,6 @@ async def test_update_openai_vector_store(vector_io_adapter): assert vector_io_adapter.openai_vector_stores[openai_vector_store["id"]] == openai_vector_store -@pytest.mark.asyncio async def test_delete_openai_vector_store(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -188,7 +174,6 @@ async def test_delete_openai_vector_store(vector_io_adapter): assert openai_vector_store["id"] not in vector_io_adapter.openai_vector_stores -@pytest.mark.asyncio async def test_load_openai_vector_stores(vector_io_adapter): store_id = "vs_1234" openai_vector_store = { @@ -204,7 +189,6 @@ async def test_load_openai_vector_stores(vector_io_adapter): assert loaded_stores[store_id] == openai_vector_store -@pytest.mark.asyncio async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -226,7 +210,6 @@ async def test_save_openai_vector_store_file(vector_io_adapter, tmp_path_factory await vector_io_adapter._save_openai_vector_store_file(store_id, file_id, file_info, file_contents) -@pytest.mark.asyncio async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -260,7 +243,6 @@ async def test_update_openai_vector_store_file(vector_io_adapter, tmp_path_facto assert loaded_contents != file_info -@pytest.mark.asyncio async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" @@ -284,7 +266,6 @@ async def test_load_openai_vector_store_file_contents(vector_io_adapter, tmp_pat assert loaded_contents == file_contents -@pytest.mark.asyncio async def test_delete_openai_vector_store_file_from_storage(vector_io_adapter, tmp_path_factory): store_id = "vs_1234" file_id = "file_1234" diff --git a/tests/unit/rag/test_rag_query.py b/tests/unit/rag/test_rag_query.py index d2dd1783b..b2baa744a 100644 --- a/tests/unit/rag/test_rag_query.py +++ b/tests/unit/rag/test_rag_query.py @@ -17,13 +17,11 @@ from llama_stack.providers.inline.tool_runtime.rag.memory import MemoryToolRunti class TestRagQuery: - @pytest.mark.asyncio async def test_query_raises_on_empty_vector_db_ids(self): rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) with pytest.raises(ValueError): await rag_tool.query(content=MagicMock(), vector_db_ids=[]) - @pytest.mark.asyncio async def test_query_chunk_metadata_handling(self): rag_tool = MemoryToolRuntimeImpl(config=MagicMock(), vector_io_api=MagicMock(), inference_api=MagicMock()) content = "test query content" diff --git a/tests/unit/rag/test_vector_store.py b/tests/unit/rag/test_vector_store.py index 9d6b9ee67..919f97ba7 100644 --- a/tests/unit/rag/test_vector_store.py +++ b/tests/unit/rag/test_vector_store.py @@ -112,7 +112,6 @@ class TestValidateEmbedding: class TestVectorStore: - @pytest.mark.asyncio async def test_returns_content_from_pdf_data_uri(self): data_uri = data_url_from_file(DUMMY_PDF_PATH) doc = RAGDocument( @@ -124,7 +123,7 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.asyncio + @pytest.mark.allow_network async def test_downloads_pdf_and_returns_content(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" @@ -137,7 +136,7 @@ class TestVectorStore: content = await content_from_doc(doc) assert content in DUMMY_PDF_TEXT_CHOICES - @pytest.mark.asyncio + @pytest.mark.allow_network async def test_downloads_pdf_and_returns_content_with_url_object(self): # Using GitHub to host the PDF file url = "https://raw.githubusercontent.com/meta-llama/llama-stack/da035d69cfca915318eaf485770a467ca3c2a238/llama_stack/providers/tests/memory/fixtures/dummy.pdf" @@ -204,7 +203,6 @@ class TestVectorStore: class TestVectorDBWithIndex: - @pytest.mark.asyncio async def test_insert_chunks_without_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model without embeddings" @@ -230,7 +228,6 @@ class TestVectorDBWithIndex: assert args[0] == chunks assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) - @pytest.mark.asyncio async def test_insert_chunks_with_valid_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model with embeddings" @@ -255,7 +252,6 @@ class TestVectorDBWithIndex: assert args[0] == chunks assert np.array_equal(args[1], np.array([[0.1, 0.2, 0.3], [0.4, 0.5, 0.6]], dtype=np.float32)) - @pytest.mark.asyncio async def test_insert_chunks_with_invalid_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_dimension = 3 @@ -295,7 +291,6 @@ class TestVectorDBWithIndex: mock_inference_api.embeddings.assert_not_called() mock_index.add_chunks.assert_not_called() - @pytest.mark.asyncio async def test_insert_chunks_with_partially_precomputed_embeddings(self): mock_vector_db = MagicMock() mock_vector_db.embedding_model = "test-model with partial embeddings" diff --git a/tests/unit/registry/test_registry.py b/tests/unit/registry/test_registry.py index 909581bb7..87fe18d54 100644 --- a/tests/unit/registry/test_registry.py +++ b/tests/unit/registry/test_registry.py @@ -38,14 +38,12 @@ def sample_model(): ) -@pytest.mark.asyncio async def test_registry_initialization(disk_dist_registry): # Test empty registry result = await disk_dist_registry.get("nonexistent", "nonexistent") assert result is None -@pytest.mark.asyncio async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_model): print(f"Registering {sample_vector_db}") await disk_dist_registry.register(sample_vector_db) @@ -64,7 +62,6 @@ async def test_basic_registration(disk_dist_registry, sample_vector_db, sample_m assert result_model.provider_id == sample_model.provider_id -@pytest.mark.asyncio async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, sample_model): # First populate the disk registry disk_registry = DiskDistributionRegistry(sqlite_kvstore) @@ -85,7 +82,6 @@ async def test_cached_registry_initialization(sqlite_kvstore, sample_vector_db, assert result_vector_db.provider_id == sample_vector_db.provider_id -@pytest.mark.asyncio async def test_cached_registry_updates(cached_disk_dist_registry): new_vector_db = VectorDB( identifier="test_vector_db_2", @@ -112,7 +108,6 @@ async def test_cached_registry_updates(cached_disk_dist_registry): assert result_vector_db.provider_id == new_vector_db.provider_id -@pytest.mark.asyncio async def test_duplicate_provider_registration(cached_disk_dist_registry): original_vector_db = VectorDB( identifier="test_vector_db_2", @@ -137,7 +132,6 @@ async def test_duplicate_provider_registration(cached_disk_dist_registry): assert result.embedding_model == original_vector_db.embedding_model # Original values preserved -@pytest.mark.asyncio async def test_get_all_objects(cached_disk_dist_registry): # Create multiple test banks # Create multiple test banks @@ -170,7 +164,6 @@ async def test_get_all_objects(cached_disk_dist_registry): assert stored_vector_db.embedding_dimension == original_vector_db.embedding_dimension -@pytest.mark.asyncio async def test_parse_registry_values_error_handling(sqlite_kvstore): valid_db = VectorDB( identifier="valid_vector_db", @@ -209,7 +202,6 @@ async def test_parse_registry_values_error_handling(sqlite_kvstore): assert invalid_obj is None -@pytest.mark.asyncio async def test_cached_registry_error_handling(sqlite_kvstore): valid_db = VectorDB( identifier="valid_cached_db", diff --git a/tests/unit/registry/test_registry_acl.py b/tests/unit/registry/test_registry_acl.py index 48b3ac51b..6cfb20944 100644 --- a/tests/unit/registry/test_registry_acl.py +++ b/tests/unit/registry/test_registry_acl.py @@ -5,14 +5,11 @@ # the root directory of this source tree. -import pytest - from llama_stack.apis.models import ModelType from llama_stack.distribution.datatypes import ModelWithOwner, User from llama_stack.distribution.store.registry import CachedDiskDistributionRegistry -@pytest.mark.asyncio async def test_registry_cache_with_acl(cached_disk_dist_registry): model = ModelWithOwner( identifier="model-acl", @@ -48,7 +45,6 @@ async def test_registry_cache_with_acl(cached_disk_dist_registry): assert new_model.owner.attributes["teams"] == ["ai-team"] -@pytest.mark.asyncio async def test_registry_empty_acl(cached_disk_dist_registry): model = ModelWithOwner( identifier="model-empty-acl", @@ -85,7 +81,6 @@ async def test_registry_empty_acl(cached_disk_dist_registry): assert len(all_models) == 2 -@pytest.mark.asyncio async def test_registry_serialization(cached_disk_dist_registry): attributes = { "roles": ["admin", "researcher"], diff --git a/tests/unit/server/test_access_control.py b/tests/unit/server/test_access_control.py index af03ddacb..fb9c6f95e 100644 --- a/tests/unit/server/test_access_control.py +++ b/tests/unit/server/test_access_control.py @@ -7,7 +7,6 @@ from unittest.mock import MagicMock, Mock, patch import pytest -import pytest_asyncio import yaml from pydantic import TypeAdapter, ValidationError @@ -27,7 +26,7 @@ def _return_model(model): return model -@pytest_asyncio.fixture +@pytest.fixture async def test_setup(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -41,7 +40,6 @@ async def test_setup(cached_disk_dist_registry): yield cached_disk_dist_registry, routing_table -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_with_cache(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -106,7 +104,6 @@ async def test_access_control_with_cache(mock_get_authenticated_user, test_setup await routing_table.get_model("model-admin") -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_and_updates(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -145,7 +142,6 @@ async def test_access_control_and_updates(mock_get_authenticated_user, test_setu assert model.identifier == "model-updates" -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_control_empty_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -170,7 +166,6 @@ async def test_access_control_empty_attributes(mock_get_authenticated_user, test assert "model-empty-attrs" in model_ids -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_no_user_attributes(mock_get_authenticated_user, test_setup): registry, routing_table = test_setup @@ -201,7 +196,6 @@ async def test_no_user_attributes(mock_get_authenticated_user, test_setup): assert all_models.data[0].identifier == "model-public-2" -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_automatic_access_attributes(mock_get_authenticated_user, test_setup): """Test that newly created resources inherit access attributes from their creator.""" @@ -246,7 +240,7 @@ async def test_automatic_access_attributes(mock_get_authenticated_user, test_set assert model.identifier == "auto-access-model" -@pytest_asyncio.fixture +@pytest.fixture async def test_setup_with_access_policy(cached_disk_dist_registry): mock_inference = Mock() mock_inference.__provider_spec__ = MagicMock() @@ -281,7 +275,6 @@ async def test_setup_with_access_policy(cached_disk_dist_registry): yield routing_table -@pytest.mark.asyncio @patch("llama_stack.distribution.routing_tables.common.get_authenticated_user") async def test_access_policy(mock_get_authenticated_user, test_setup_with_access_policy): routing_table = test_setup_with_access_policy diff --git a/tests/unit/server/test_auth.py b/tests/unit/server/test_auth.py index 39d6af1c8..7012a7f17 100644 --- a/tests/unit/server/test_auth.py +++ b/tests/unit/server/test_auth.py @@ -202,7 +202,6 @@ def test_http_auth_request_payload(http_client, valid_api_key, mock_auth_endpoin assert "param2" in payload["request"]["params"] -@pytest.mark.asyncio async def test_http_middleware_with_access_attributes(mock_http_middleware, mock_scope): """Test HTTP middleware behavior with access attributes""" middleware, mock_app = mock_http_middleware diff --git a/tests/unit/server/test_resolver.py b/tests/unit/server/test_resolver.py index acf4da0a3..a348590b1 100644 --- a/tests/unit/server/test_resolver.py +++ b/tests/unit/server/test_resolver.py @@ -9,7 +9,6 @@ import sys from typing import Any, Protocol from unittest.mock import AsyncMock, MagicMock -import pytest from pydantic import BaseModel, Field from llama_stack.apis.inference import Inference @@ -66,7 +65,6 @@ class SampleImpl: pass -@pytest.mark.asyncio async def test_resolve_impls_basic(): # Create a real provider spec provider_spec = InlineProviderSpec( diff --git a/tests/unit/server/test_sse.py b/tests/unit/server/test_sse.py index 60e9f4609..d42857186 100644 --- a/tests/unit/server/test_sse.py +++ b/tests/unit/server/test_sse.py @@ -7,13 +7,10 @@ import asyncio from unittest.mock import AsyncMock, MagicMock -import pytest - from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.distribution.server.server import create_dynamic_typed_route, create_sse_event, sse_generator -@pytest.mark.asyncio async def test_sse_generator_basic(): # An AsyncIterator wrapped in an Awaitable, just like our web methods async def async_event_gen(): @@ -35,7 +32,6 @@ async def test_sse_generator_basic(): assert seen_events[1] == create_sse_event("Test event 2") -@pytest.mark.asyncio async def test_sse_generator_client_disconnected(): # An AsyncIterator wrapped in an Awaitable, just like our web methods async def async_event_gen(): @@ -58,7 +54,6 @@ async def test_sse_generator_client_disconnected(): assert seen_events[0] == create_sse_event("Test event 1") -@pytest.mark.asyncio async def test_sse_generator_client_disconnected_before_response_starts(): # Disconnect before the response starts async def async_event_gen(): @@ -75,7 +70,6 @@ async def test_sse_generator_client_disconnected_before_response_starts(): assert len(seen_events) == 0 -@pytest.mark.asyncio async def test_sse_generator_error_before_response_starts(): # Raise an error before the response starts async def async_event_gen(): @@ -93,7 +87,6 @@ async def test_sse_generator_error_before_response_starts(): assert 'data: {"error":' in seen_events[0] -@pytest.mark.asyncio async def test_paginated_response_url_setting(): """Test that PaginatedResponse gets url set to route path.""" diff --git a/tests/unit/utils/inference/test_inference_store.py b/tests/unit/utils/inference/test_inference_store.py index de619c760..730f54a05 100644 --- a/tests/unit/utils/inference/test_inference_store.py +++ b/tests/unit/utils/inference/test_inference_store.py @@ -42,7 +42,6 @@ def create_test_chat_completion( ) -@pytest.mark.asyncio async def test_inference_store_pagination_basic(): """Test basic pagination functionality.""" with TemporaryDirectory() as tmp_dir: @@ -88,7 +87,6 @@ async def test_inference_store_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_inference_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -123,7 +121,6 @@ async def test_inference_store_pagination_ascending(): assert result2.has_more is True -@pytest.mark.asyncio async def test_inference_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: @@ -161,7 +158,6 @@ async def test_inference_store_pagination_with_model_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_inference_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: @@ -174,7 +170,6 @@ async def test_inference_store_pagination_invalid_after(): await store.list_chat_completions(after="non-existent", limit=2) -@pytest.mark.asyncio async def test_inference_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/responses/test_responses_store.py b/tests/unit/utils/responses/test_responses_store.py index 3f25e2524..44d4b30da 100644 --- a/tests/unit/utils/responses/test_responses_store.py +++ b/tests/unit/utils/responses/test_responses_store.py @@ -44,7 +44,6 @@ def create_test_response_input(content: str, input_id: str) -> OpenAIResponseInp ) -@pytest.mark.asyncio async def test_responses_store_pagination_basic(): """Test basic pagination functionality for responses store.""" with TemporaryDirectory() as tmp_dir: @@ -90,7 +89,6 @@ async def test_responses_store_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_responses_store_pagination_ascending(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -125,7 +123,6 @@ async def test_responses_store_pagination_ascending(): assert result2.has_more is True -@pytest.mark.asyncio async def test_responses_store_pagination_with_model_filter(): """Test pagination combined with model filtering.""" with TemporaryDirectory() as tmp_dir: @@ -163,7 +160,6 @@ async def test_responses_store_pagination_with_model_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_responses_store_pagination_invalid_after(): """Test error handling for invalid 'after' parameter.""" with TemporaryDirectory() as tmp_dir: @@ -176,7 +172,6 @@ async def test_responses_store_pagination_invalid_after(): await store.list_responses(after="non-existent", limit=2) -@pytest.mark.asyncio async def test_responses_store_pagination_no_limit(): """Test pagination behavior when no limit is specified.""" with TemporaryDirectory() as tmp_dir: @@ -205,7 +200,6 @@ async def test_responses_store_pagination_no_limit(): assert result.has_more is False -@pytest.mark.asyncio async def test_responses_store_get_response_object(): """Test retrieving a single response object.""" with TemporaryDirectory() as tmp_dir: @@ -230,7 +224,6 @@ async def test_responses_store_get_response_object(): await store.get_response_object("non-existent") -@pytest.mark.asyncio async def test_responses_store_input_items_pagination(): """Test pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: @@ -308,7 +301,6 @@ async def test_responses_store_input_items_pagination(): await store.list_response_input_items("test-resp", before="some-id", after="other-id") -@pytest.mark.asyncio async def test_responses_store_input_items_before_pagination(): """Test before pagination functionality for input items.""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/sqlstore/test_sqlstore.py b/tests/unit/utils/sqlstore/test_sqlstore.py index c4230a396..778f0b658 100644 --- a/tests/unit/utils/sqlstore/test_sqlstore.py +++ b/tests/unit/utils/sqlstore/test_sqlstore.py @@ -14,7 +14,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig -@pytest.mark.asyncio async def test_sqlite_sqlstore(): with TemporaryDirectory() as tmp_dir: db_name = "test.db" @@ -66,7 +65,6 @@ async def test_sqlite_sqlstore(): assert result.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_basic(): """Test basic pagination functionality at the SQL store level.""" with TemporaryDirectory() as tmp_dir: @@ -131,7 +129,6 @@ async def test_sqlstore_pagination_basic(): assert result3.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_with_filter(): """Test pagination with WHERE conditions.""" with TemporaryDirectory() as tmp_dir: @@ -184,7 +181,6 @@ async def test_sqlstore_pagination_with_filter(): assert result2.has_more is False -@pytest.mark.asyncio async def test_sqlstore_pagination_ascending_order(): """Test pagination with ascending order.""" with TemporaryDirectory() as tmp_dir: @@ -233,7 +229,6 @@ async def test_sqlstore_pagination_ascending_order(): assert result2.has_more is True -@pytest.mark.asyncio async def test_sqlstore_pagination_multi_column_ordering_error(): """Test that multi-column ordering raises an error when using cursor pagination.""" with TemporaryDirectory() as tmp_dir: @@ -271,7 +266,6 @@ async def test_sqlstore_pagination_multi_column_ordering_error(): assert result.data[0]["id"] == "task1" -@pytest.mark.asyncio async def test_sqlstore_pagination_cursor_requires_order_by(): """Test that cursor pagination requires order_by parameter.""" with TemporaryDirectory() as tmp_dir: @@ -289,7 +283,6 @@ async def test_sqlstore_pagination_cursor_requires_order_by(): ) -@pytest.mark.asyncio async def test_sqlstore_pagination_error_handling(): """Test error handling for invalid columns and cursor IDs.""" with TemporaryDirectory() as tmp_dir: @@ -339,7 +332,6 @@ async def test_sqlstore_pagination_error_handling(): ) -@pytest.mark.asyncio async def test_sqlstore_pagination_custom_key_column(): """Test pagination with custom primary key column (not 'id').""" with TemporaryDirectory() as tmp_dir: diff --git a/tests/unit/utils/test_authorized_sqlstore.py b/tests/unit/utils/test_authorized_sqlstore.py index 61763719a..066f67a98 100644 --- a/tests/unit/utils/test_authorized_sqlstore.py +++ b/tests/unit/utils/test_authorized_sqlstore.py @@ -7,8 +7,6 @@ from tempfile import TemporaryDirectory from unittest.mock import patch -import pytest - from llama_stack.distribution.access_control.access_control import default_policy, is_action_allowed from llama_stack.distribution.access_control.datatypes import Action from llama_stack.distribution.datatypes import User @@ -18,7 +16,6 @@ from llama_stack.providers.utils.sqlstore.sqlalchemy_sqlstore import SqlAlchemyS from llama_stack.providers.utils.sqlstore.sqlstore import SqliteSqlStoreConfig -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_fetch_with_where_sql_access_control(mock_get_authenticated_user): """Test that fetch_all works correctly with where_sql for access control""" @@ -81,7 +78,6 @@ async def test_authorized_fetch_with_where_sql_access_control(mock_get_authentic assert row["title"] == "User Document" -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_sql_policy_consistency(mock_get_authenticated_user): """Test that SQL WHERE clause logic exactly matches is_action_allowed policy logic""" @@ -168,7 +164,6 @@ async def test_sql_policy_consistency(mock_get_authenticated_user): ) -@pytest.mark.asyncio @patch("llama_stack.providers.utils.sqlstore.authorized_sqlstore.get_authenticated_user") async def test_authorized_store_user_attribute_capture(mock_get_authenticated_user): """Test that user attributes are properly captured during insert""" diff --git a/uv.lock b/uv.lock index e77fb89f5..83e502e7f 100644 --- a/uv.lock +++ b/uv.lock @@ -615,6 +615,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/a3/460c57f094a4a165c84a1341c373b0a4f5ec6ac244b998d5021aade89b77/ecdsa-0.19.1-py2.py3-none-any.whl", hash = "sha256:30638e27cf77b7e15c4c4cc1973720149e1033827cfd00661ca5c8cc0cdb24c3", size = 150607, upload-time = "2025-03-13T11:52:41.757Z" }, ] +[[package]] +name = "eval-type-backport" +version = "0.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/30/ea/8b0ac4469d4c347c6a385ff09dc3c048c2d021696664e26c7ee6791631b5/eval_type_backport-0.2.2.tar.gz", hash = "sha256:f0576b4cf01ebb5bd358d02314d31846af5e07678387486e2c798af0e7d849c1", size = 9079, upload-time = "2024-12-21T20:09:46.005Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ce/31/55cd413eaccd39125368be33c46de24a1f639f2e12349b0361b4678f3915/eval_type_backport-0.2.2-py3-none-any.whl", hash = "sha256:cb6ad7c393517f476f96d456d0412ea80f0a8cf96f6892834cd9340149111b0a", size = 5830, upload-time = "2024-12-21T20:09:44.175Z" }, +] + [[package]] name = "executing" version = "2.2.0" @@ -1238,6 +1247,28 @@ version = "1.4" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/65/c6/246100fa3967074d9725b3716913bd495823547bde5047050d4c3462f994/linkify-1.4.tar.gz", hash = "sha256:9ba276ba179525f7262820d90f009604e51cd4f1466c1112b882ef7eda243d5e", size = 1749, upload-time = "2009-11-12T21:42:00.934Z" } +[[package]] +name = "litellm" +version = "1.74.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "httpx" }, + { name = "importlib-metadata" }, + { name = "jinja2" }, + { name = "jsonschema" }, + { name = "openai" }, + { name = "pydantic" }, + { name = "python-dotenv" }, + { name = "tiktoken" }, + { name = "tokenizers" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/38/10/63cdae1b1d581ad1db51153dfd06c4e18394a3ba8de495f73f2d797ece3b/litellm-1.74.2.tar.gz", hash = "sha256:cbacffe93976c60ca674fec0a942c70b900b4ad1c8069395174049a162f255bf", size = 9230641, upload-time = "2025-07-11T03:31:07.925Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/f7/67689245f48b9e79bcd2f3a10a3690cb1918fb99fffd5a623ed2496bca66/litellm-1.74.2-py3-none-any.whl", hash = "sha256:29bb555b45128e4cc696e72921a6ec24e97b14e9b69e86eed6f155124ad629b1", size = 8587065, upload-time = "2025-07-11T03:31:05.598Z" }, +] + [[package]] name = "llama-stack" version = "0.2.14" @@ -1293,6 +1324,7 @@ dev = [ { name = "pytest-cov" }, { name = "pytest-html" }, { name = "pytest-json-report" }, + { name = "pytest-socket" }, { name = "pytest-timeout" }, { name = "ruamel-yaml" }, { name = "ruff" }, @@ -1341,6 +1373,7 @@ unit = [ { name = "blobfile" }, { name = "chardet" }, { name = "faiss-cpu" }, + { name = "litellm" }, { name = "mcp" }, { name = "openai" }, { name = "pymilvus" }, @@ -1348,6 +1381,7 @@ unit = [ { name = "qdrant-client" }, { name = "sqlalchemy", extra = ["asyncio"] }, { name = "sqlite-vec" }, + { name = "together" }, ] [package.metadata] @@ -1365,8 +1399,8 @@ requires-dist = [ { name = "llama-stack-client", specifier = ">=0.2.14" }, { name = "llama-stack-client", marker = "extra == 'ui'", specifier = ">=0.2.14" }, { name = "openai", specifier = ">=1.66" }, - { name = "opentelemetry-exporter-otlp-proto-http" }, - { name = "opentelemetry-sdk" }, + { name = "opentelemetry-exporter-otlp-proto-http", specifier = ">=1.30.0" }, + { name = "opentelemetry-sdk", specifier = ">=1.30.0" }, { name = "pandas", marker = "extra == 'ui'" }, { name = "pillow" }, { name = "prompt-toolkit" }, @@ -1394,11 +1428,12 @@ dev = [ { name = "black" }, { name = "nbval" }, { name = "pre-commit" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, + { name = "pytest", specifier = ">=8.4" }, + { name = "pytest-asyncio", specifier = ">=1.0" }, { name = "pytest-cov" }, { name = "pytest-html" }, { name = "pytest-json-report" }, + { name = "pytest-socket" }, { name = "pytest-timeout" }, { name = "ruamel-yaml" }, { name = "ruff" }, @@ -1446,6 +1481,7 @@ unit = [ { name = "blobfile" }, { name = "chardet" }, { name = "faiss-cpu" }, + { name = "litellm" }, { name = "mcp" }, { name = "openai" }, { name = "pymilvus", specifier = ">=2.5.12" }, @@ -1454,6 +1490,7 @@ unit = [ { name = "sqlalchemy" }, { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0.41" }, { name = "sqlite-vec" }, + { name = "together" }, ] [[package]] @@ -2432,29 +2469,30 @@ wheels = [ [[package]] name = "pytest" -version = "8.3.4" +version = "8.4.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, + { name = "pygments" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/05/35/30e0d83068951d90a01852cb1cef56e5d8a09d20c7f511634cc2f7e0372a/pytest-8.3.4.tar.gz", hash = "sha256:965370d062bce11e73868e0335abac31b4d3de0e82f4007408d242b4f8610761", size = 1445919, upload-time = "2024-12-01T12:54:25.98Z" } +sdist = { url = "https://files.pythonhosted.org/packages/08/ba/45911d754e8eba3d5a841a5ce61a65a685ff1798421ac054f85aa8747dfb/pytest-8.4.1.tar.gz", hash = "sha256:7c67fd69174877359ed9371ec3af8a3d2b04741818c51e5e99cc1742251fa93c", size = 1517714, upload-time = "2025-06-18T05:48:06.109Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/11/92/76a1c94d3afee238333bc0a42b82935dd8f9cf8ce9e336ff87ee14d9e1cf/pytest-8.3.4-py3-none-any.whl", hash = "sha256:50e16d954148559c9a74109af1eaf0c945ba2d8f30f0a3d3335edde19788b6f6", size = 343083, upload-time = "2024-12-01T12:54:19.735Z" }, + { url = "https://files.pythonhosted.org/packages/29/16/c8a903f4c4dffe7a12843191437d7cd8e32751d5de349d45d3fe69544e87/pytest-8.4.1-py3-none-any.whl", hash = "sha256:539c70ba6fcead8e78eebbf1115e8b589e7565830d7d006a8723f19ac8a0afb7", size = 365474, upload-time = "2025-06-18T05:48:03.955Z" }, ] [[package]] name = "pytest-asyncio" -version = "0.25.3" +version = "1.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "pytest" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/f2/a8/ecbc8ede70921dd2f544ab1cadd3ff3bf842af27f87bbdea774c7baa1d38/pytest_asyncio-0.25.3.tar.gz", hash = "sha256:fc1da2cf9f125ada7e710b4ddad05518d4cee187ae9412e9ac9271003497f07a", size = 54239, upload-time = "2025-01-28T18:37:58.729Z" } +sdist = { url = "https://files.pythonhosted.org/packages/d0/d4/14f53324cb1a6381bef29d698987625d80052bb33932d8e7cbf9b337b17c/pytest_asyncio-1.0.0.tar.gz", hash = "sha256:d15463d13f4456e1ead2594520216b225a16f781e144f8fdf6c5bb4667c48b3f", size = 46960, upload-time = "2025-05-26T04:54:40.484Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/67/17/3493c5624e48fd97156ebaec380dcaafee9506d7e2c46218ceebbb57d7de/pytest_asyncio-0.25.3-py3-none-any.whl", hash = "sha256:9e89518e0f9bd08928f97a3482fdc4e244df17529460bc038291ccaf8f85c7c3", size = 19467, upload-time = "2025-01-28T18:37:56.798Z" }, + { url = "https://files.pythonhosted.org/packages/30/05/ce271016e351fddc8399e546f6e23761967ee09c8c568bbfbecb0c150171/pytest_asyncio-1.0.0-py3-none-any.whl", hash = "sha256:4f024da9f1ef945e680dc68610b52550e36590a67fd31bb3b4943979a1f90ef3", size = 15976, upload-time = "2025-05-26T04:54:39.035Z" }, ] [[package]] @@ -2509,6 +2547,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/43/7e7b2ec865caa92f67b8f0e9231a798d102724ca4c0e1f414316be1c1ef2/pytest_metadata-3.1.1-py3-none-any.whl", hash = "sha256:c8e0844db684ee1c798cfa38908d20d67d0463ecb6137c72e91f418558dd5f4b", size = 11428, upload-time = "2024-02-12T19:38:42.531Z" }, ] +[[package]] +name = "pytest-socket" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/ff/90c7e1e746baf3d62ce864c479fd53410b534818b9437413903596f81580/pytest_socket-0.7.0.tar.gz", hash = "sha256:71ab048cbbcb085c15a4423b73b619a8b35d6a307f46f78ea46be51b1b7e11b3", size = 12389, upload-time = "2024-01-28T20:17:23.177Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/19/58/5d14cb5cb59409e491ebe816c47bf81423cd03098ea92281336320ae5681/pytest_socket-0.7.0-py3-none-any.whl", hash = "sha256:7e0f4642177d55d317bbd58fc68c6bd9048d6eadb2d46a89307fa9221336ce45", size = 6754, upload-time = "2024-01-28T20:17:22.105Z" }, +] + [[package]] name = "pytest-timeout" version = "2.4.0" @@ -2951,6 +3001,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/29/93c53c098d301132196c3238c312825324740851d77a8500a2462c0fd888/setuptools-80.8.0-py3-none-any.whl", hash = "sha256:95a60484590d24103af13b686121328cc2736bee85de8936383111e421b9edc0", size = 1201470, upload-time = "2025-05-20T14:02:51.348Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -3383,6 +3442,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b2/fe/81695a1aa331a842b582453b605175f419fe8540355886031328089d840a/sympy-1.13.1-py3-none-any.whl", hash = "sha256:db36cdc64bf61b9b24578b6f7bab1ecdd2452cf008f34faa33776680c26d66f8", size = 6189177, upload-time = "2024-07-19T09:26:48.863Z" }, ] +[[package]] +name = "tabulate" +version = "0.9.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ec/fe/802052aecb21e3797b8f7902564ab6ea0d60ff8ca23952079064155d1ae1/tabulate-0.9.0.tar.gz", hash = "sha256:0095b12bf5966de529c0feb1fa08671671b3368eec77d7ef7ab114be2c068b3c", size = 81090, upload-time = "2022-10-06T17:21:48.54Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252, upload-time = "2022-10-06T17:21:44.262Z" }, +] + [[package]] name = "tenacity" version = "9.1.2" @@ -3425,6 +3493,29 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/de/a8/8f499c179ec900783ffe133e9aab10044481679bb9aad78436d239eee716/tiktoken-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:5ea0edb6f83dc56d794723286215918c1cde03712cbbafa0348b33448faf5b95", size = 894669, upload-time = "2025-02-14T06:02:47.341Z" }, ] +[[package]] +name = "together" +version = "1.5.21" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "aiohttp" }, + { name = "click" }, + { name = "eval-type-backport" }, + { name = "filelock" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "pydantic" }, + { name = "requests" }, + { name = "rich" }, + { name = "tabulate" }, + { name = "tqdm" }, + { name = "typer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ea/53/e33c5e6d53c2e2bbd07f9dcb1979e27ac670fca0e4e238b169aa4c358ee2/together-1.5.21.tar.gz", hash = "sha256:59adb8cf4c5b77eca76b8c66a73c47c45fd828aaf4f059f33f893f8c5f68f85a", size = 69887, upload-time = "2025-07-10T21:04:43.781Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/31/6556a303ea39929fa016f4260eef289b620cf366a576c304507cb75b4d12/together-1.5.21-py3-none-any.whl", hash = "sha256:35e6c0072033a2e5f1105de8781e969f41cffc85dae508b6f4dc293360026872", size = 96141, upload-time = "2025-07-10T21:04:42.418Z" }, +] + [[package]] name = "tokenizers" version = "0.21.1" @@ -3643,6 +3734,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/aa/22/733a6fc4a6445d835242f64c490fdd30f4a08d58f2b788613de3f9170692/transformers-4.50.3-py3-none-any.whl", hash = "sha256:6111610a43dec24ef32c3df0632c6b25b07d9711c01d9e1077bdd2ff6b14a38c", size = 10180411, upload-time = "2025-03-28T18:20:59.265Z" }, ] +[[package]] +name = "typer" +version = "0.15.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/6c/89/c527e6c848739be8ceb5c44eb8208c52ea3515c6cf6406aa61932887bf58/typer-0.15.4.tar.gz", hash = "sha256:89507b104f9b6a0730354f27c39fae5b63ccd0c95b1ce1f1a6ba0cfd329997c3", size = 101559, upload-time = "2025-05-14T16:34:57.704Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c9/62/d4ba7afe2096d5659ec3db8b15d8665bdcb92a3c6ff0b95e99895b335a9c/typer-0.15.4-py3-none-any.whl", hash = "sha256:eb0651654dcdea706780c466cf06d8f174405a659ffff8f163cfbfee98c0e173", size = 45258, upload-time = "2025-05-14T16:34:55.583Z" }, +] + [[package]] name = "types-requests" version = "2.32.0.20241016"