From 7ec2d955ee5801a2ddc17126edd5990efbfe5c49 Mon Sep 17 00:00:00 2001 From: Sixian Yi Date: Tue, 21 Jan 2025 22:59:49 -0800 Subject: [PATCH] add cerebras and ollama --- llama_stack/templates/cerebras/report.md | 45 +++++++++++++++++++++++ llama_stack/templates/fireworks/report.md | 6 +-- llama_stack/templates/ollama/report.md | 45 +++++++++++++++++++++++ llama_stack/templates/tgi/report.md | 45 +++++++++++++++++++++++ tests/client-sdk/conftest.py | 1 + tests/client-sdk/report.py | 20 ++++++++++ 6 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 llama_stack/templates/cerebras/report.md create mode 100644 llama_stack/templates/ollama/report.md create mode 100644 llama_stack/templates/tgi/report.md diff --git a/llama_stack/templates/cerebras/report.md b/llama_stack/templates/cerebras/report.md new file mode 100644 index 000000000..b6117d437 --- /dev/null +++ b/llama_stack/templates/cerebras/report.md @@ -0,0 +1,45 @@ +# Report for cerebras distribution + +## Supported Models: +| Model Descriptor | cerebras | +|:---|:---| +| Llama-3-8B-Instruct | ❌ | +| Llama-3-70B-Instruct | ❌ | +| Llama3.1-8B-Instruct | ✅ | +| Llama3.1-70B-Instruct | ❌ | +| Llama3.1-405B-Instruct | ❌ | +| Llama3.2-1B-Instruct | ❌ | +| Llama3.2-3B-Instruct | ❌ | +| Llama3.2-11B-Vision-Instruct | ❌ | +| Llama3.2-90B-Vision-Instruct | ❌ | +| Llama3.3-70B-Instruct | ✅ | +| Llama-Guard-3-11B-Vision | ❌ | +| Llama-Guard-3-1B | ❌ | +| Llama-Guard-3-8B | ❌ | +| Llama-Guard-2-8B | ❌ | + +## Inference: +| Model | API | Capability | Test | Status | +|:----- |:-----|:-----|:-----|:-----| +| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ | +| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️ | +| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️ | +| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Text | /completion | streaming | test_text_completion_streaming | ✅ | +| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ | +| Text | /completion | structured_output | test_text_completion_structured_output | ❌ | + +## Memory: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ | +| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ | + +## Agents: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| create_agent_turn | rag | test_rag_agent | ✅ | +| create_agent_turn | custom_tool | test_custom_tool | ✅ | +| create_agent_turn | code_execution | test_code_execution | ✅ | diff --git a/llama_stack/templates/fireworks/report.md b/llama_stack/templates/fireworks/report.md index ac6fab6eb..5dd3513b5 100644 --- a/llama_stack/templates/fireworks/report.md +++ b/llama_stack/templates/fireworks/report.md @@ -34,12 +34,12 @@ ## Memory: | API | Capability | Test | Status | |:-----|:-----|:-----|:-----| -| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ❌ | -| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ❌ | +| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ | +| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ | ## Agents: | API | Capability | Test | Status | |:-----|:-----|:-----|:-----| | create_agent_turn | rag | test_rag_agent | ❌ | -| create_agent_turn | custom_tool | test_custom_tool | ✅ | +| create_agent_turn | custom_tool | test_custom_tool | ❌ | | create_agent_turn | code_execution | test_code_execution | ❌ | diff --git a/llama_stack/templates/ollama/report.md b/llama_stack/templates/ollama/report.md new file mode 100644 index 000000000..b18a7a751 --- /dev/null +++ b/llama_stack/templates/ollama/report.md @@ -0,0 +1,45 @@ +# Report for ollama distribution + +## Supported Models: +| Model Descriptor | ollama | +|:---|:---| +| Llama-3-8B-Instruct | ❌ | +| Llama-3-70B-Instruct | ❌ | +| Llama3.1-8B-Instruct | ✅ | +| Llama3.1-70B-Instruct | ✅ | +| Llama3.1-405B-Instruct | ✅ | +| Llama3.2-1B-Instruct | ✅ | +| Llama3.2-3B-Instruct | ✅ | +| Llama3.2-11B-Vision-Instruct | ✅ | +| Llama3.2-90B-Vision-Instruct | ✅ | +| Llama3.3-70B-Instruct | ✅ | +| Llama-Guard-3-11B-Vision | ❌ | +| Llama-Guard-3-1B | ✅ | +| Llama-Guard-3-8B | ✅ | +| Llama-Guard-2-8B | ❌ | + +## Inference: +| Model | API | Capability | Test | Status | +|:----- |:-----|:-----|:-----|:-----| +| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ | +| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️ | +| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️ | +| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Text | /completion | streaming | test_text_completion_streaming | ✅ | +| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ | +| Text | /completion | structured_output | test_text_completion_structured_output | ✅ | + +## Memory: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ | +| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ | + +## Agents: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| create_agent_turn | rag | test_rag_agent | ✅ | +| create_agent_turn | custom_tool | test_custom_tool | ❌ | +| create_agent_turn | code_execution | test_code_execution | ✅ | diff --git a/llama_stack/templates/tgi/report.md b/llama_stack/templates/tgi/report.md new file mode 100644 index 000000000..cc69923de --- /dev/null +++ b/llama_stack/templates/tgi/report.md @@ -0,0 +1,45 @@ +# Report for tgi distribution + +## Supported Models: +| Model Descriptor | tgi | +|:---|:---| +| Llama-3-8B-Instruct | ✅ | +| Llama-3-70B-Instruct | ✅ | +| Llama3.1-8B-Instruct | ✅ | +| Llama3.1-70B-Instruct | ✅ | +| Llama3.1-405B-Instruct | ✅ | +| Llama3.2-1B-Instruct | ✅ | +| Llama3.2-3B-Instruct | ✅ | +| Llama3.2-11B-Vision-Instruct | ✅ | +| Llama3.2-90B-Vision-Instruct | ✅ | +| Llama3.3-70B-Instruct | ✅ | +| Llama-Guard-3-11B-Vision | ✅ | +| Llama-Guard-3-1B | ✅ | +| Llama-Guard-3-8B | ✅ | +| Llama-Guard-2-8B | ✅ | + +## Inference: +| Model | API | Capability | Test | Status | +|:----- |:-----|:-----|:-----|:-----| +| Text | /chat_completion | streaming | test_text_chat_completion_streaming | ✅ | +| Vision | /chat_completion | streaming | test_image_chat_completion_streaming | ⏭️ | +| Vision | /chat_completion | non_streaming | test_image_chat_completion_non_streaming | ⏭️ | +| Text | /chat_completion | non_streaming | test_text_chat_completion_non_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_streaming | ✅ | +| Text | /chat_completion | tool_calling | test_text_chat_completion_with_tool_calling_and_non_streaming | ✅ | +| Text | /completion | streaming | test_text_completion_streaming | ✅ | +| Text | /completion | non_streaming | test_text_completion_non_streaming | ✅ | +| Text | /completion | structured_output | test_text_completion_structured_output | ✅ | + +## Memory: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| /insert, /query | inline | test_memory_bank_insert_inline_and_query | ✅ | +| /insert, /query | url | test_memory_bank_insert_from_url_and_query | ✅ | + +## Agents: +| API | Capability | Test | Status | +|:-----|:-----|:-----|:-----| +| create_agent_turn | rag | test_rag_agent | ✅ | +| create_agent_turn | custom_tool | test_custom_tool | ❌ | +| create_agent_turn | code_execution | test_code_execution | ✅ | diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py index 0b5324c0e..c19546887 100644 --- a/tests/client-sdk/conftest.py +++ b/tests/client-sdk/conftest.py @@ -32,6 +32,7 @@ def pytest_addoption(parser): TEXT_MODEL = "meta-llama/Llama-3.1-8B-Instruct" INFERENCE_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct" + @pytest.fixture(scope="session") def provider_data(): # check env for tavily secret, brave secret and inject all into provider data diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py index 22aa98935..f4aba52e8 100644 --- a/tests/client-sdk/report.py +++ b/tests/client-sdk/report.py @@ -68,6 +68,26 @@ SUPPORTED_MODELS = { CoreModelId.llama_guard_3_11b_vision.value, ] ), + "tgi": set( + [ + model.core_model_id.value + for model in all_registered_models() + if model.huggingface_repo + ] + ), + "vllm": set( + [ + model.core_model_id.value + for model in all_registered_models() + if model.huggingface_repo + ] + ), + "cerebras": set( + [ + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + ] + ), }