Merge branch 'main' into agent_session_unit_test

2025-12-18 12:09:47 +00:00 · 2025-08-12 10:51:00 -06:00 · 2025-08-12 10:51:00 -06:00 · a4180894ec
commit a4180894ec
parent d17d3c88cc 6812aa1e1e
21 changed files with 308 additions and 182 deletions
--- a/llama_stack/apis/common/errors.py
+++ b/llama_stack/apis/common/errors.py
@ -62,3 +62,13 @@ class SessionNotFoundError(ValueError):
    def __init__(self, session_name: str) -> None:
        message = f"Session '{session_name}' not found or access denied."
        super().__init__(message)
+
+
+class ModelTypeError(TypeError):
+    """raised when a model is present but not the correct type"""
+
+    def __init__(self, model_name: str, model_type: str, expected_model_type: str) -> None:
+        message = (
+            f"Model '{model_name}' is of type '{model_type}' rather than the expected type '{expected_model_type}'"
+        )
+        super().__init__(message)
--- a/llama_stack/core/routers/inference.py
+++ b/llama_stack/core/routers/inference.py
@ -18,7 +18,7 @@ from llama_stack.apis.common.content_types import (
    InterleavedContent,
    InterleavedContentItem,
 )
-from llama_stack.apis.common.errors import ModelNotFoundError
+from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
 from llama_stack.apis.inference import (
    BatchChatCompletionResponse,
    BatchCompletionResponse,
@ -177,6 +177,15 @@ class InferenceRouter(Inference):
            encoded = self.formatter.encode_content(messages)
        return len(encoded.tokens) if encoded and encoded.tokens else 0

+    async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
+        """takes a model id and gets model after ensuring that it is accessible and of the correct type"""
+        model = await self.routing_table.get_model(model_id)
+        if model is None:
+            raise ModelNotFoundError(model_id)
+        if model.model_type != expected_model_type:
+            raise ModelTypeError(model_id, model.model_type, expected_model_type)
+        return model
+
    async def chat_completion(
        self,
        model_id: str,
@ -195,11 +204,7 @@ class InferenceRouter(Inference):
        )
        if sampling_params is None:
            sampling_params = SamplingParams()
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        model = await self._get_model(model_id, ModelType.llm)
        if tool_config:
            if tool_choice and tool_choice != tool_config.tool_choice:
                raise ValueError("tool_choice and tool_config.tool_choice must match")
@ -301,11 +306,7 @@ class InferenceRouter(Inference):
        logger.debug(
            f"InferenceRouter.completion: {model_id=}, {stream=}, {content=}, {sampling_params=}, {response_format=}",
        )
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model_id}' is an embedding model and does not support chat completions")
+        model = await self._get_model(model_id, ModelType.llm)
        provider = await self.routing_table.get_provider_impl(model_id)
        params = dict(
            model_id=model_id,
@ -355,11 +356,7 @@ class InferenceRouter(Inference):
        task_type: EmbeddingTaskType | None = None,
    ) -> EmbeddingsResponse:
        logger.debug(f"InferenceRouter.embeddings: {model_id}")
-        model = await self.routing_table.get_model(model_id)
-        if model is None:
-            raise ModelNotFoundError(model_id)
-        if model.model_type == ModelType.llm:
-            raise ValueError(f"Model '{model_id}' is an LLM model and does not support embeddings")
+        await self._get_model(model_id, ModelType.embedding)
        provider = await self.routing_table.get_provider_impl(model_id)
        return await provider.embeddings(
            model_id=model_id,
@ -395,12 +392,7 @@ class InferenceRouter(Inference):
        logger.debug(
            f"InferenceRouter.openai_completion: {model=}, {stream=}, {prompt=}",
        )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support completions")
-
+        model_obj = await self._get_model(model, ModelType.llm)
        params = dict(
            model=model_obj.identifier,
            prompt=prompt,
@ -476,11 +468,7 @@ class InferenceRouter(Inference):
        logger.debug(
            f"InferenceRouter.openai_chat_completion: {model=}, {stream=}, {messages=}",
        )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type == ModelType.embedding:
-            raise ValueError(f"Model '{model}' is an embedding model and does not support chat completions")
+        model_obj = await self._get_model(model, ModelType.llm)

        # Use the OpenAI client for a bit of extra input validation without
        # exposing the OpenAI client itself as part of our API surface
@ -567,12 +555,7 @@ class InferenceRouter(Inference):
        logger.debug(
            f"InferenceRouter.openai_embeddings: {model=}, input_type={type(input)}, {encoding_format=}, {dimensions=}",
        )
-        model_obj = await self.routing_table.get_model(model)
-        if model_obj is None:
-            raise ModelNotFoundError(model)
-        if model_obj.model_type != ModelType.embedding:
-            raise ValueError(f"Model '{model}' is not an embedding model")
-
+        model_obj = await self._get_model(model, ModelType.embedding)
        params = dict(
            model=model_obj.identifier,
            input=input,
--- a/llama_stack/core/routing_tables/toolgroups.py
+++ b/llama_stack/core/routing_tables/toolgroups.py
@ -124,10 +124,7 @@ class ToolGroupsRoutingTable(CommonRoutingTableImpl, ToolGroups):
        return toolgroup

    async def unregister_toolgroup(self, toolgroup_id: str) -> None:
-        tool_group = await self.get_tool_group(toolgroup_id)
-        if tool_group is None:
-            raise ToolGroupNotFoundError(toolgroup_id)
-        await self.unregister_object(tool_group)
+        await self.unregister_object(await self.get_tool_group(toolgroup_id))

    async def shutdown(self) -> None:
        pass
--- a/llama_stack/core/routing_tables/vector_dbs.py
+++ b/llama_stack/core/routing_tables/vector_dbs.py
@ -8,7 +8,7 @@ from typing import Any

 from pydantic import TypeAdapter

-from llama_stack.apis.common.errors import ModelNotFoundError, VectorStoreNotFoundError
+from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError, VectorStoreNotFoundError
 from llama_stack.apis.models import ModelType
 from llama_stack.apis.resource import ResourceType
 from llama_stack.apis.vector_dbs import ListVectorDBsResponse, VectorDB, VectorDBs
@ -66,7 +66,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs):
        if model is None:
            raise ModelNotFoundError(embedding_model)
        if model.model_type != ModelType.embedding:
-            raise ValueError(f"Model {embedding_model} is not an embedding model")
+            raise ModelTypeError(embedding_model, model.model_type, ModelType.embedding)
        if "embedding_dimension" not in model.metadata:
            raise ValueError(f"Model {embedding_model} does not have an embedding dimension")
        vector_db_data = {
--- a/llama_stack/log.py
+++ b/llama_stack/log.py
@ -99,7 +99,8 @@ def parse_environment_config(env_config: str) -> dict[str, int]:
        Dict[str, int]: A dictionary mapping categories to their log levels.
    """
    category_levels = {}
-    for pair in env_config.split(";"):
+    delimiter = ","
+    for pair in env_config.split(delimiter):
        if not pair.strip():
            continue

--- a/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
+++ b/llama_stack/providers/inline/safety/prompt_guard/prompt_guard.py
@ -15,6 +15,7 @@ from llama_stack.apis.safety import (
    RunShieldResponse,
    Safety,
    SafetyViolation,
+    ShieldStore,
    ViolationLevel,
 )
 from llama_stack.apis.shields import Shield
@ -32,6 +33,8 @@ PROMPT_GUARD_MODEL = "Prompt-Guard-86M"


 class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
+    shield_store: ShieldStore
+
    def __init__(self, config: PromptGuardConfig, _deps) -> None:
        self.config = config

@ -53,7 +56,7 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):
        self,
        shield_id: str,
        messages: list[Message],
-        params: dict[str, Any] = None,
+        params: dict[str, Any],
    ) -> RunShieldResponse:
        shield = await self.shield_store.get_shield(shield_id)
        if not shield:
@ -61,6 +64,9 @@ class PromptGuardSafetyImpl(Safety, ShieldsProtocolPrivate):

        return await self.shield.run(messages)

+    async def run_moderation(self, input: str | list[str], model: str):
+        raise NotImplementedError("run_moderation not implemented for PromptGuard")
+

 class PromptGuardShield:
    def __init__(
@ -117,8 +123,10 @@ class PromptGuardShield:
        elif self.config.guard_type == PromptGuardType.jailbreak.value and score_malicious > self.threshold:
            violation = SafetyViolation(
                violation_level=ViolationLevel.ERROR,
-                violation_type=f"prompt_injection:malicious={score_malicious}",
-                violation_return_message="Sorry, I cannot do this.",
+                user_message="Sorry, I cannot do this.",
+                metadata={
+                    "violation_type": f"prompt_injection:malicious={score_malicious}",
+                },
            )

        return RunShieldResponse(violation=violation)
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -457,9 +457,6 @@ class OllamaInferenceAdapter(
        user: str | None = None,
    ) -> OpenAIEmbeddingsResponse:
        model_obj = await self._get_model(model)
-        if model_obj.model_type != ModelType.embedding:
-            raise ValueError(f"Model {model} is not an embedding model")
-
        if model_obj.provider_resource_id is None:
            raise ValueError(f"Model {model} has no provider_resource_id set")

--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -70,7 +70,7 @@ from openai.types.chat.chat_completion_chunk import (
 from openai.types.chat.chat_completion_content_part_image_param import (
    ImageURL as OpenAIImageURL,
 )
-from openai.types.chat.chat_completion_message_tool_call_param import (
+from openai.types.chat.chat_completion_message_tool_call import (
    Function as OpenAIFunction,
 )
 from pydantic import BaseModel
--- a/llama_stack/ui/app/chat-playground/page.tsx
+++ b/llama_stack/ui/app/chat-playground/page.tsx
@ -175,7 +175,7 @@ const handleSubmitWithContent = async (content: string) => {
  return (
    <div className="flex flex-col h-full max-w-4xl mx-auto">
      <div className="mb-4 flex justify-between items-center">
-        <h1 className="text-2xl font-bold">Chat Playground</h1>
+        <h1 className="text-2xl font-bold">Chat Playground (Completions)</h1>
        <div className="flex gap-2">
          <Select value={selectedModel} onValueChange={setSelectedModel} disabled={isModelsLoading || isGenerating}>
            <SelectTrigger className="w-[180px]">
--- a/llama_stack/ui/components/layout/app-sidebar.tsx
+++ b/llama_stack/ui/components/layout/app-sidebar.tsx
@ -6,6 +6,8 @@ import {
  MoveUpRight,
  Database,
  MessageCircle,
+  Settings2,
+  Compass,
 } from "lucide-react";
 import Link from "next/link";
 import { usePathname } from "next/navigation";
@ -22,15 +24,16 @@ import {
  SidebarMenuItem,
  SidebarHeader,
 } from "@/components/ui/sidebar";
-// Extracted Chat Playground item
-const chatPlaygroundItem = {
-  title: "Chat Playground",
-  url: "/chat-playground",
-  icon: MessageCircle,
-};

-// Removed Chat Playground from log items
-const logItems = [
+const createItems = [
+  {
+    title: "Chat Playground",
+    url: "/chat-playground",
+    icon: MessageCircle,
+  },
+];
+
+const manageItems = [
  {
    title: "Chat Completions",
    url: "/logs/chat-completions",
@ -53,77 +56,96 @@ const logItems = [
  },
 ];

+const optimizeItems: { title: string; url: string; icon: React.ElementType }[] = [
+    {
+        title: "Evaluations",
+        url: "",
+        icon: Compass,
+    },
+    {
+        title: "Fine-tuning",
+        url: "",
+        icon: Settings2,
+    },
+];
+
+interface SidebarItem {
+  title: string;
+  url: string;
+  icon: React.ElementType;
+}
+
 export function AppSidebar() {
  const pathname = usePathname();

-  return (
-    <Sidebar>
-      <SidebarHeader>
-        <Link href="/">Llama Stack</Link>
-      </SidebarHeader>
-      <SidebarContent>
-        {/* Chat Playground as its own section */}
-        <SidebarGroup>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              <SidebarMenuItem>
+  const renderSidebarItems = (items: SidebarItem[]) => {
+    return items.map((item) => {
+      const isActive = pathname.startsWith(item.url);
+      return (
+        <SidebarMenuItem key={item.title}>
+          <SidebarMenuButton
+            asChild
+            className={cn(
+              "justify-start",
+              isActive &&
+                "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
+            )}
+          >
+            <Link href={item.url}>
+              <item.icon
+                className={cn(
+                  isActive && "text-gray-900 dark:text-gray-100",
+                  "mr-2 h-4 w-4",
+                )}
+              />
+              <span>{item.title}</span>
+            </Link>
+          </SidebarMenuButton>
+        </SidebarMenuItem>
+      );
+    });
+  };
+
+return (
+  <Sidebar>
+    <SidebarHeader>
+      <Link href="/">Llama Stack</Link>
+    </SidebarHeader>
+    <SidebarContent>
+      <SidebarGroup>
+        <SidebarGroupLabel>Create</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(createItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Manage</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>{renderSidebarItems(manageItems)}</SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+
+      <SidebarGroup>
+        <SidebarGroupLabel>Optimize</SidebarGroupLabel>
+        <SidebarGroupContent>
+          <SidebarMenu>
+            {optimizeItems.map((item) => (
+              <SidebarMenuItem key={item.title}>
                <SidebarMenuButton
-                  asChild
-                  className={cn(
-                    "justify-start",
-                    pathname.startsWith(chatPlaygroundItem.url) &&
-                      "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                  )}
+                  disabled
+                  className="justify-start opacity-60 cursor-not-allowed"
                >
-                  <Link href={chatPlaygroundItem.url}>
-                    <chatPlaygroundItem.icon
-                      className={cn(
-                        pathname.startsWith(chatPlaygroundItem.url) && "text-gray-900 dark:text-gray-100",
-                        "mr-2 h-4 w-4",
-                      )}
-                    />
-                    <span>{chatPlaygroundItem.title}</span>
-                  </Link>
+                  <item.icon className="mr-2 h-4 w-4" />
+                  <span>{item.title}</span>
+                  <span className="ml-2 text-xs text-gray-500">(Coming Soon)</span>
                </SidebarMenuButton>
              </SidebarMenuItem>
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-
-        {/* Logs section */}
-        <SidebarGroup>
-          <SidebarGroupLabel>Logs</SidebarGroupLabel>
-          <SidebarGroupContent>
-            <SidebarMenu>
-              {logItems.map((item) => {
-                const isActive = pathname.startsWith(item.url);
-                return (
-                  <SidebarMenuItem key={item.title}>
-                    <SidebarMenuButton
-                      asChild
-                      className={cn(
-                        "justify-start",
-                        isActive &&
-                          "bg-gray-200 dark:bg-gray-700 hover:bg-gray-200 dark:hover:bg-gray-700 text-gray-900 dark:text-gray-100",
-                      )}
-                    >
-                      <Link href={item.url}>
-                        <item.icon
-                          className={cn(
-                            isActive && "text-gray-900 dark:text-gray-100",
-                            "mr-2 h-4 w-4",
-                          )}
-                        />
-                        <span>{item.title}</span>
-                      </Link>
-                    </SidebarMenuButton>
-                  </SidebarMenuItem>
-                );
-              })}
-            </SidebarMenu>
-          </SidebarGroupContent>
-        </SidebarGroup>
-      </SidebarContent>
-    </Sidebar>
+            ))}
+          </SidebarMenu>
+        </SidebarGroupContent>
+      </SidebarGroup>
+    </SidebarContent>
+  </Sidebar>
  );
 }