feat: Updating files/content response to return additional fields

Signed-off-by: Francisco Javier Arceo <farceo@redhat.com>
2025-10-05 20:27:35 +00:00 · 2025-08-06 16:55:14 -04:00 · 2025-08-06 16:55:14 -04:00 · a19c16428f
commit a19c16428f
parent e12524af85
143 changed files with 6907 additions and 15104 deletions
--- a/llama_stack/providers/remote/inference/gemini/models.py
+++ b/llama_stack/providers/remote/inference/gemini/models.py
@ -13,7 +13,9 @@ LLM_MODEL_IDS = [
    "gemini-1.5-flash",
    "gemini-1.5-pro",
    "gemini-2.0-flash",
+    "gemini-2.0-flash-lite",
    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
    "gemini-2.5-pro",
 ]

--- a/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
+++ b/llama_stack/providers/remote/inference/nvidia/NVIDIA.md
@ -42,8 +42,8 @@ client.initialize()
 ### Create Completion

 ```python
-response = client.completion(
-    model_id="meta-llama/Llama-3.1-8b-Instruct",
+response = client.inference.completion(
+    model_id="meta-llama/Llama-3.1-8B-Instruct",
    content="Complete the sentence using one word: Roses are red, violets are :",
    stream=False,
    sampling_params={
@ -56,8 +56,8 @@ print(f"Response: {response.content}")
 ### Create Chat Completion

 ```python
-response = client.chat_completion(
-    model_id="meta-llama/Llama-3.1-8b-Instruct",
+response = client.inference.chat_completion(
+    model_id="meta-llama/Llama-3.1-8B-Instruct",
    messages=[
        {
            "role": "system",
@ -78,8 +78,10 @@ print(f"Response: {response.completion_message.content}")

 ### Create Embeddings
 ```python
-response = client.embeddings(
-    model_id="meta-llama/Llama-3.1-8b-Instruct", contents=["foo", "bar", "baz"]
+response = client.inference.embeddings(
+    model_id="nvidia/llama-3.2-nv-embedqa-1b-v2",
+    contents=["What is the capital of France?"],
+    task_type="query",
 )
 print(f"Embeddings: {response.embeddings}")
-```
+```
--- a/llama_stack/providers/remote/inference/ollama/ollama.py
+++ b/llama_stack/providers/remote/inference/ollama/ollama.py
@ -112,7 +112,8 @@ class OllamaInferenceAdapter(
    @property
    def openai_client(self) -> AsyncOpenAI:
        if self._openai_client is None:
-            self._openai_client = AsyncOpenAI(base_url=f"{self.config.url}/v1", api_key="ollama")
+            url = self.config.url.rstrip("/")
+            self._openai_client = AsyncOpenAI(base_url=f"{url}/v1", api_key="ollama")
        return self._openai_client

    async def initialize(self) -> None: