From 83ede71e76704677ea96c5e29c6670e8c1a1fe92 Mon Sep 17 00:00:00 2001
From: Swapna Lekkala <swapna942@meta.com>
Date: Wed, 27 Aug 2025 15:36:57 -0700
Subject: [PATCH] Fix docker failing to start container

---
 docs/source/providers/agents/index.md               | 12 ++++++------
 docs/source/providers/inference/index.md            |  6 +++---
 llama_stack/providers/registry/inference.py         |  2 +-
 .../providers/remote/inference/vertexai/vertexai.py | 13 ++++++++++++-
 4 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/docs/source/providers/agents/index.md b/docs/source/providers/agents/index.md
index a2c48d4b9..046db6bff 100644
--- a/docs/source/providers/agents/index.md
+++ b/docs/source/providers/agents/index.md
@@ -4,12 +4,12 @@
 
 Agents API for creating and interacting with agentic systems.
 
-    Main functionalities provided by this API:
-    - Create agents with specific instructions and ability to use tools.
-    - Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
-    - Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
-    - Agents can be provided with various shields (see the Safety API for more details).
-    - Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
+Main functionalities provided by this API:
+- Create agents with specific instructions and ability to use tools.
+- Interactions with agents are grouped into sessions ("threads"), and each interaction is called a "turn".
+- Agents can be provided with various tools (see the ToolGroups and ToolRuntime APIs for more details).
+- Agents can be provided with various shields (see the Safety API for more details).
+- Agents can also use Memory to retrieve information from knowledge bases. See the RAG Tool and Vector IO APIs for more details.
 
 This section contains documentation for all available providers for the **agents** API.
 
diff --git a/docs/source/providers/inference/index.md b/docs/source/providers/inference/index.md
index b6d215474..291e8e525 100644
--- a/docs/source/providers/inference/index.md
+++ b/docs/source/providers/inference/index.md
@@ -4,9 +4,9 @@
 
 Llama Stack Inference API for generating completions, chat completions, and embeddings.
 
-    This API provides the raw interface to the underlying models. Two kinds of models are supported:
-    - LLM models: these models generate "raw" and "chat" (conversational) completions.
-    - Embedding models: these models generate embeddings to be used for semantic search.
+This API provides the raw interface to the underlying models. Two kinds of models are supported:
+- LLM models: these models generate "raw" and "chat" (conversational) completions.
+- Embedding models: these models generate embeddings to be used for semantic search.
 
 This section contains documentation for all available providers for the **inference** API.
 
diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py
index 82b771a28..096bc3045 100644
--- a/llama_stack/providers/registry/inference.py
+++ b/llama_stack/providers/registry/inference.py
@@ -116,7 +116,7 @@ def available_providers() -> list[ProviderSpec]:
             adapter=AdapterSpec(
                 adapter_type="fireworks",
                 pip_packages=[
-                    "fireworks-ai",
+                    "fireworks-ai==0.17.16",
                 ],
                 module="llama_stack.providers.remote.inference.fireworks",
                 config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
diff --git a/llama_stack/providers/remote/inference/vertexai/vertexai.py b/llama_stack/providers/remote/inference/vertexai/vertexai.py
index 8807fd0e6..f195977b6 100644
--- a/llama_stack/providers/remote/inference/vertexai/vertexai.py
+++ b/llama_stack/providers/remote/inference/vertexai/vertexai.py
@@ -6,7 +6,9 @@
 
 from typing import Any
 
-from llama_stack.apis.inference import ChatCompletionRequest
+from openai.types.chat import ChatCompletionContentPartImageParam, ChatCompletionContentPartTextParam
+
+from llama_stack.apis.inference import ChatCompletionRequest, RerankResponse
 from llama_stack.providers.utils.inference.litellm_openai_mixin import (
     LiteLLMOpenAIMixin,
 )
@@ -50,3 +52,12 @@ class VertexAIInferenceAdapter(LiteLLMOpenAIMixin):
         params.pop("api_key", None)
 
         return params
+
+    async def rerank(
+        self,
+        model: str,
+        query: str | ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam,
+        items: list[str | ChatCompletionContentPartTextParam | ChatCompletionContentPartImageParam],
+        max_num_results: int | None = None,
+    ) -> RerankResponse:
+        raise NotImplementedError("Reranking is not supported for Vertex AI")