From 7ec2d955ee5801a2ddc17126edd5990efbfe5c49 Mon Sep 17 00:00:00 2001
From: Sixian Yi <sxyi@meta.com>
Date: Tue, 21 Jan 2025 22:59:49 -0800
Subject: [PATCH] add cerebras and ollama

---
 llama_stack/templates/cerebras/report.md  | 45 +++++++++++++++++++++++
 llama_stack/templates/fireworks/report.md |  6 +--
 llama_stack/templates/ollama/report.md    | 45 +++++++++++++++++++++++
 llama_stack/templates/tgi/report.md       | 45 +++++++++++++++++++++++
 tests/client-sdk/conftest.py              |  1 +
 tests/client-sdk/report.py                | 20 ++++++++++
 6 files changed, 159 insertions(+), 3 deletions(-)
 create mode 100644 llama_stack/templates/cerebras/report.md
 create mode 100644 llama_stack/templates/ollama/report.md
 create mode 100644 llama_stack/templates/tgi/report.md

diff --git a/llama_stack/templates/cerebras/report.md b/llama_stack/templates/cerebras/report.md
new file mode 100644
index 000000000..b6117d437
--- /dev/null
+++ b/llama_stack/templates/cerebras/report.md
@@ -0,0 +1,45 @@
+# Report for cerebras distribution
+
+## Supported Models:
+| Model Descriptor | cerebras |
+|:---|:---|
+| Llama-3-8B-Instruct | ❌ |
+| Llama-3-70B-Instruct | ❌ |
+| Llama3.1-8B-Instruct | ✅ |
+| Llama3.1-70B-Instruct | ❌ |
+| Llama3.1-405B-Instruct | ❌ |
+| Llama3.2-1B-Instruct | ❌ |
+| Llama3.2-3B-Instruct | ❌ |
+| Llama3.2-11B-Vision-Instruct | ❌ |
+| Llama3.2-90B-Vision-Instruct | ❌ |
+| Llama3.3-70B-Instruct | ✅ |
+| Llama-Guard-3-11B-Vision | ❌ |
+| Llama-Guard-3-1B | ❌ |
+| Llama-Guard-3-8B | ❌ |
+| Llama-Guard-2-8B | ❌ |
+
+## Inference:
+| Model | API | Capability | Test | Status |
+|:----- |:-----|:-----|:-----|:-----|
+| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
+| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️ |
+| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️ |
+| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
+| Text | /completion | streaming | test_text_completion_streaming | ✅ |
+| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
+| Text | /completion | structured_output | test_text_completion_structured_output | ❌ |
+
+## Memory:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ |
+| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ |
+
+## Agents:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| create_agent_turn | rag | test_rag_agent | ✅ |
+| create_agent_turn | custom_tool | test_custom_tool | ✅ |
+| create_agent_turn | code_execution | test_code_execution | ✅ |
diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md
index ac6fab6eb..5dd3513b5 100644
--- a/llama_stack/templates/fireworks/report.md
+++ b/llama_stack/templates/fireworks/report.md
@@ -34,12 +34,12 @@
 ## Memory:
 | API | Capability | Test | Status |
 |:-----|:-----|:-----|:-----|
-| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ |
-| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ |
+| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ |
+| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ |
 
 ## Agents:
 | API | Capability | Test | Status |
 |:-----|:-----|:-----|:-----|
 | create_agent_turn | rag | test_rag_agent | ❌ |
-| create_agent_turn | custom_tool | test_custom_tool | ✅ |
+| create_agent_turn | custom_tool | test_custom_tool | ❌ |
 | create_agent_turn | code_execution | test_code_execution | ❌ |
diff --git a/llama_stack/templates/ollama/report.md b/llama_stack/templates/ollama/report.md
new file mode 100644
index 000000000..b18a7a751
--- /dev/null
+++ b/llama_stack/templates/ollama/report.md
@@ -0,0 +1,45 @@
+# Report for ollama distribution
+
+## Supported Models:
+| Model Descriptor | ollama |
+|:---|:---|
+| Llama-3-8B-Instruct | ❌ |
+| Llama-3-70B-Instruct | ❌ |
+| Llama3.1-8B-Instruct | ✅ |
+| Llama3.1-70B-Instruct | ✅ |
+| Llama3.1-405B-Instruct | ✅ |
+| Llama3.2-1B-Instruct | ✅ |
+| Llama3.2-3B-Instruct | ✅ |
+| Llama3.2-11B-Vision-Instruct | ✅ |
+| Llama3.2-90B-Vision-Instruct | ✅ |
+| Llama3.3-70B-Instruct | ✅ |
+| Llama-Guard-3-11B-Vision | ❌ |
+| Llama-Guard-3-1B | ✅ |
+| Llama-Guard-3-8B | ✅ |
+| Llama-Guard-2-8B | ❌ |
+
+## Inference:
+| Model | API | Capability | Test | Status |
+|:----- |:-----|:-----|:-----|:-----|
+| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
+| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️ |
+| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️ |
+| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
+| Text | /completion | streaming | test_text_completion_streaming | ✅ |
+| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
+| Text | /completion | structured_output | test_text_completion_structured_output | ✅ |
+
+## Memory:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ |
+| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ |
+
+## Agents:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| create_agent_turn | rag | test_rag_agent | ✅ |
+| create_agent_turn | custom_tool | test_custom_tool | ❌ |
+| create_agent_turn | code_execution | test_code_execution | ✅ |
diff --git a/llama_stack/templates/tgi/report.md b/llama_stack/templates/tgi/report.md
new file mode 100644
index 000000000..cc69923de
--- /dev/null
+++ b/llama_stack/templates/tgi/report.md
@@ -0,0 +1,45 @@
+# Report for tgi distribution
+
+## Supported Models:
+| Model Descriptor | tgi |
+|:---|:---|
+| Llama-3-8B-Instruct | ✅ |
+| Llama-3-70B-Instruct | ✅ |
+| Llama3.1-8B-Instruct | ✅ |
+| Llama3.1-70B-Instruct | ✅ |
+| Llama3.1-405B-Instruct | ✅ |
+| Llama3.2-1B-Instruct | ✅ |
+| Llama3.2-3B-Instruct | ✅ |
+| Llama3.2-11B-Vision-Instruct | ✅ |
+| Llama3.2-90B-Vision-Instruct | ✅ |
+| Llama3.3-70B-Instruct | ✅ |
+| Llama-Guard-3-11B-Vision | ✅ |
+| Llama-Guard-3-1B | ✅ |
+| Llama-Guard-3-8B | ✅ |
+| Llama-Guard-2-8B | ✅ |
+
+## Inference:
+| Model | API | Capability | Test | Status |
+|:----- |:-----|:-----|:-----|:-----|
+| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ |
+| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️  |
+| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️  |
+| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ |
+| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ |
+| Text | /completion | streaming | test_text_completion_streaming | ✅ |
+| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ |
+| Text | /completion | structured_output | test_text_completion_structured_output | ✅ |
+
+## Memory:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ |
+| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ |
+
+## Agents:
+| API | Capability | Test | Status |
+|:-----|:-----|:-----|:-----|
+| create_agent_turn | rag | test_rag_agent | ✅ |
+| create_agent_turn | custom_tool | test_custom_tool | ❌ |
+| create_agent_turn | code_execution | test_code_execution | ✅ |
diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py
index 0b5324c0e..c19546887 100644
--- a/tests/client-sdk/conftest.py
+++ b/tests/client-sdk/conftest.py
@@ -32,6 +32,7 @@ def pytest_addoption(parser):
 TEXT_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
 INFERENCE_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 
+
 @pytest.fixture(scope="session")
 def provider_data():
     # check env for tavily secret, brave secret and inject all into provider data
diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py
index 22aa98935..f4aba52e8 100644
--- a/tests/client-sdk/report.py
+++ b/tests/client-sdk/report.py
@@ -68,6 +68,26 @@ SUPPORTED_MODELS = {
             CoreModelId.llama_guard_3_11b_vision.value,
         ]
     ),
+    "tgi": set(
+        [
+            model.core_model_id.value
+            for model in all_registered_models()
+            if model.huggingface_repo
+        ]
+    ),
+    "vllm": set(
+        [
+            model.core_model_id.value
+            for model in all_registered_models()
+            if model.huggingface_repo
+        ]
+    ),
+    "cerebras": set(
+        [
+            CoreModelId.llama3_1_8b_instruct.value,
+            CoreModelId.llama3_3_70b_instruct.value,
+        ]
+    ),
 }