From 5f94ed56c566935a7b7b7fe254dfb380c67b50b6 Mon Sep 17 00:00:00 2001 From: anigasan Date: Thu, 3 Jul 2025 12:21:28 -0700 Subject: [PATCH 01/17] Changed config and tavily_search for tavily API --- .../remote/tool_runtime/tavily_search/config.py | 12 +++++++++++- .../tool_runtime/tavily_search/tavily_search.py | 6 +++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ca4e615db..28f7c3d3c 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -18,10 +18,20 @@ class TavilySearchToolConfig(BaseModel): default=3, description="The maximum number of results to return", ) + timeout: float = Field( + default=30.0, + description="HTTP request timeout in seconds", + ) + connect_timeout: float = Field( + default=10.0, + description="HTTP connection timeout in seconds", + ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, + "timeout": 30.0, + "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index 1fe91fd7f..e8ca36cf4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,7 +69,11 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - async with httpx.AsyncClient() as client: + + # Configure timeout for external API calls using config values + timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From 7201bdaee44ca1c6fe09bb55f0a421221fa8867d Mon Sep 17 00:00:00 2001 From: anigasan Date: Fri, 4 Jul 2025 11:48:47 -0700 Subject: [PATCH 02/17] config file changes --- .../providers/remote/tool_runtime/tavily_search/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index 28f7c3d3c..30d29d34b 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -30,7 +30,7 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, "timeout": 30.0, "connect_timeout": 10.0, From ef332c296e08ae764d030bcf35c07e9e6421cbec Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 12:52:47 -0700 Subject: [PATCH 03/17] commits --- .../remote/tool_runtime/tavily_search/config.py | 6 +++--- .../remote/tool_runtime/tavily_search/tavily_search.py | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index 30d29d34b..ae7b41d50 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -20,11 +20,11 @@ class TavilySearchToolConfig(BaseModel): ) timeout: float = Field( default=30.0, - description="HTTP request timeout in seconds", + description="HTTP request timeout for the API", ) connect_timeout: float = Field( default=10.0, - description="HTTP connection timeout in seconds", + description="HTTP connection timeout in seconds for the API", ) @classmethod @@ -32,6 +32,6 @@ class TavilySearchToolConfig(BaseModel): return { "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, - "timeout": 30.0, + "timeout:" 30.0, "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index e8ca36cf4..cdaf78d4a 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,11 +69,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - - # Configure timeout for external API calls using config values - timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) - - async with httpx.AsyncClient(timeout=timeout) as client: + + timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + + async with httpx.AsyncClient() as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From e679cd02617404c3267e882a932b392bf71e3e3e Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 18:44:27 -0700 Subject: [PATCH 04/17] more commits --- docs/source/providers/tool_runtime/remote_tavily-search.md | 4 ++++ .../providers/remote/tool_runtime/tavily_search/config.py | 2 +- .../remote/tool_runtime/tavily_search/tavily_search.py | 2 +- llama_stack/templates/bedrock/run.yaml | 2 ++ llama_stack/templates/cerebras/run.yaml | 2 ++ llama_stack/templates/ci-tests/run.yaml | 2 ++ llama_stack/templates/dell/run-with-safety.yaml | 2 ++ llama_stack/templates/dell/run.yaml | 2 ++ llama_stack/templates/fireworks/run-with-safety.yaml | 2 ++ llama_stack/templates/fireworks/run.yaml | 2 ++ llama_stack/templates/groq/run.yaml | 2 ++ llama_stack/templates/hf-endpoint/run-with-safety.yaml | 2 ++ llama_stack/templates/hf-endpoint/run.yaml | 2 ++ llama_stack/templates/hf-serverless/run-with-safety.yaml | 2 ++ llama_stack/templates/hf-serverless/run.yaml | 2 ++ llama_stack/templates/llama_api/run.yaml | 2 ++ llama_stack/templates/meta-reference-gpu/run-with-safety.yaml | 2 ++ llama_stack/templates/meta-reference-gpu/run.yaml | 2 ++ llama_stack/templates/ollama/run-with-safety.yaml | 2 ++ llama_stack/templates/ollama/run.yaml | 2 ++ llama_stack/templates/open-benchmark/run.yaml | 2 ++ llama_stack/templates/passthrough/run-with-safety.yaml | 2 ++ llama_stack/templates/passthrough/run.yaml | 2 ++ llama_stack/templates/postgres-demo/run.yaml | 2 ++ llama_stack/templates/remote-vllm/run-with-safety.yaml | 2 ++ llama_stack/templates/remote-vllm/run.yaml | 2 ++ llama_stack/templates/sambanova/run.yaml | 2 ++ llama_stack/templates/starter/run.yaml | 2 ++ llama_stack/templates/tgi/run-with-safety.yaml | 2 ++ llama_stack/templates/tgi/run.yaml | 2 ++ llama_stack/templates/together/run-with-safety.yaml | 2 ++ llama_stack/templates/together/run.yaml | 2 ++ llama_stack/templates/vllm-gpu/run.yaml | 2 ++ llama_stack/templates/watsonx/run.yaml | 2 ++ 34 files changed, 68 insertions(+), 2 deletions(-) diff --git a/docs/source/providers/tool_runtime/remote_tavily-search.md b/docs/source/providers/tool_runtime/remote_tavily-search.md index 3dc31534d..73ee4af46 100644 --- a/docs/source/providers/tool_runtime/remote_tavily-search.md +++ b/docs/source/providers/tool_runtime/remote_tavily-search.md @@ -10,12 +10,16 @@ Tavily Search tool for AI-optimized web search with structured results. |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Tavily Search API Key | | `max_results` | `` | No | 3 | The maximum number of results to return | +| `timeout` | `` | No | 30.0 | HTTP request timeout for the API | +| `connect_timeout` | `` | No | 10.0 | HTTP connection timeout in seconds for the API | ## Sample Configuration ```yaml api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 +timeout: 30.0 +connect_timeout: 10.0 ``` diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ae7b41d50..ec409ab55 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -32,6 +32,6 @@ class TavilySearchToolConfig(BaseModel): return { "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, - "timeout:" 30.0, + "timeout": 30.0, "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index cdaf78d4a..d89c9b101 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,7 +70,7 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) async with httpx.AsyncClient() as client: response = await client.post( diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml index 068278c66..632fa5b02 100644 --- a/llama_stack/templates/bedrock/run.yaml +++ b/llama_stack/templates/bedrock/run.yaml @@ -85,6 +85,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml index 305e9a20f..bda867555 100644 --- a/llama_stack/templates/cerebras/run.yaml +++ b/llama_stack/templates/cerebras/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml index 5a68af3e6..6501fb7e5 100644 --- a/llama_stack/templates/ci-tests/run.yaml +++ b/llama_stack/templates/ci-tests/run.yaml @@ -89,6 +89,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml index 1e1ef1ea9..121087d8c 100644 --- a/llama_stack/templates/dell/run-with-safety.yaml +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -92,6 +92,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml index 6f5c56dd3..de661bfda 100644 --- a/llama_stack/templates/dell/run.yaml +++ b/llama_stack/templates/dell/run.yaml @@ -88,6 +88,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml index 1233e2271..26212e57d 100644 --- a/llama_stack/templates/fireworks/run-with-safety.yaml +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -105,6 +105,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml index 7f0bc49f5..38860cdb9 100644 --- a/llama_stack/templates/fireworks/run.yaml +++ b/llama_stack/templates/fireworks/run.yaml @@ -100,6 +100,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml index 351ca74f7..ecc533ad9 100644 --- a/llama_stack/templates/groq/run.yaml +++ b/llama_stack/templates/groq/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml index 63063ad91..550651cf8 100644 --- a/llama_stack/templates/hf-endpoint/run-with-safety.yaml +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml index 4caf0db04..77e7c8cfc 100644 --- a/llama_stack/templates/hf-endpoint/run.yaml +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml index a4bba1f76..5344d4347 100644 --- a/llama_stack/templates/hf-serverless/run-with-safety.yaml +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml index 23e4c1f28..de7ba3a10 100644 --- a/llama_stack/templates/hf-serverless/run.yaml +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml index 77bbcfbc8..83cf5d5b2 100644 --- a/llama_stack/templates/llama_api/run.yaml +++ b/llama_stack/templates/llama_api/run.yaml @@ -101,6 +101,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 2f5ee4062..d3600c8a1 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -106,6 +106,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index cc119bf4d..82ebd3c44 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml index 98db5fc98..2838d6069 100644 --- a/llama_stack/templates/ollama/run-with-safety.yaml +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -107,6 +107,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 38fb2bace..129bf4912 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -105,6 +105,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 7b1ef8f10..d9031b15f 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -115,6 +115,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml index 5cd8a2930..e33009b6d 100644 --- a/llama_stack/templates/passthrough/run-with-safety.yaml +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml index 5b6078953..3578b3ad8 100644 --- a/llama_stack/templates/passthrough/run.yaml +++ b/llama_stack/templates/passthrough/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: wolfram-alpha provider_type: remote::wolfram-alpha config: diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 2b6b1a64f..ce3ea23bf 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -65,6 +65,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml index a8d30904d..0faee221d 100644 --- a/llama_stack/templates/remote-vllm/run-with-safety.yaml +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -100,6 +100,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml index 58c4f867d..80da1f534 100644 --- a/llama_stack/templates/remote-vllm/run.yaml +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -93,6 +93,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml index ab6c70ae0..39e6290e1 100644 --- a/llama_stack/templates/sambanova/run.yaml +++ b/llama_stack/templates/sambanova/run.yaml @@ -70,6 +70,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index de8d35683..d2e129881 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -162,6 +162,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml index c19b916d5..400b654e7 100644 --- a/llama_stack/templates/tgi/run-with-safety.yaml +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml index f0197d74c..b4636ddfb 100644 --- a/llama_stack/templates/tgi/run.yaml +++ b/llama_stack/templates/tgi/run.yaml @@ -90,6 +90,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml index b32c9ee8d..f6df14477 100644 --- a/llama_stack/templates/together/run-with-safety.yaml +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml index 22c99f6cf..ef64cf8ee 100644 --- a/llama_stack/templates/together/run.yaml +++ b/llama_stack/templates/together/run.yaml @@ -91,6 +91,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 6d122e180..8f80a2dcb 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -95,6 +95,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index d80ee6329..8193c0ac6 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -92,6 +92,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} From 37f4d02392e7eb77ff56c1203297f1ff1deb52b2 Mon Sep 17 00:00:00 2001 From: anigasan Date: Mon, 7 Jul 2025 12:39:33 -0700 Subject: [PATCH 05/17] made changes to tavily_search, simplified config logic --- .../remote/tool_runtime/tavily_search/tavily_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index d89c9b101..5158f4d40 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,9 +70,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + timeout = httpx.Timeout(timeout=self.config.timeout) - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From 2e57d1b7e6b80202ae5ac006c2bb14a14a9b230e Mon Sep 17 00:00:00 2001 From: anigasan Date: Mon, 7 Jul 2025 12:44:45 -0700 Subject: [PATCH 06/17] made changes to configuration --- .../providers/remote/tool_runtime/tavily_search/config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ec409ab55..b4ddd9cb4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -22,10 +22,6 @@ class TavilySearchToolConfig(BaseModel): default=30.0, description="HTTP request timeout for the API", ) - connect_timeout: float = Field( - default=10.0, - description="HTTP connection timeout in seconds for the API", - ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -33,5 +29,4 @@ class TavilySearchToolConfig(BaseModel): "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, "timeout": 30.0, - "connect_timeout": 10.0, } From ef303c2a17f8a9c1708976b40feaf6f947ee5d9c Mon Sep 17 00:00:00 2001 From: anigasan Date: Thu, 3 Jul 2025 12:21:28 -0700 Subject: [PATCH 07/17] Changed config and tavily_search for tavily API --- .../remote/tool_runtime/tavily_search/config.py | 12 +++++++++++- .../tool_runtime/tavily_search/tavily_search.py | 6 +++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ca4e615db..28f7c3d3c 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -18,10 +18,20 @@ class TavilySearchToolConfig(BaseModel): default=3, description="The maximum number of results to return", ) + timeout: float = Field( + default=30.0, + description="HTTP request timeout in seconds", + ) + connect_timeout: float = Field( + default=10.0, + description="HTTP connection timeout in seconds", + ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, + "timeout": 30.0, + "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index 1fe91fd7f..e8ca36cf4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,7 +69,11 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - async with httpx.AsyncClient() as client: + + # Configure timeout for external API calls using config values + timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From b78f21508cb084f505ea6c3df9df03bfbc5efd59 Mon Sep 17 00:00:00 2001 From: anigasan Date: Fri, 4 Jul 2025 11:48:47 -0700 Subject: [PATCH 08/17] config file changes --- .../providers/remote/tool_runtime/tavily_search/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index 28f7c3d3c..30d29d34b 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -30,7 +30,7 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, "timeout": 30.0, "connect_timeout": 10.0, From f916007788479cf1c63a590a56719d420e395303 Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 12:52:47 -0700 Subject: [PATCH 09/17] commits --- .../remote/tool_runtime/tavily_search/config.py | 6 +++--- .../remote/tool_runtime/tavily_search/tavily_search.py | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index 30d29d34b..ae7b41d50 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -20,11 +20,11 @@ class TavilySearchToolConfig(BaseModel): ) timeout: float = Field( default=30.0, - description="HTTP request timeout in seconds", + description="HTTP request timeout for the API", ) connect_timeout: float = Field( default=10.0, - description="HTTP connection timeout in seconds", + description="HTTP connection timeout in seconds for the API", ) @classmethod @@ -32,6 +32,6 @@ class TavilySearchToolConfig(BaseModel): return { "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, - "timeout": 30.0, + "timeout:" 30.0, "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index e8ca36cf4..cdaf78d4a 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,11 +69,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - - # Configure timeout for external API calls using config values - timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) - - async with httpx.AsyncClient(timeout=timeout) as client: + + timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + + async with httpx.AsyncClient() as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From f997a11ac12023e04672b2bdae61f9c6e313a0ae Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 18:44:27 -0700 Subject: [PATCH 10/17] more commits --- .../tool_runtime/remote_tavily-search.md | 4 + .../tool_runtime/tavily_search/config.py | 2 +- .../tavily_search/tavily_search.py | 2 +- llama_stack/templates/bedrock/run.yaml | 144 +++++++++ llama_stack/templates/cerebras/run.yaml | 142 +++++++++ llama_stack/templates/ci-tests/run.yaml | 241 +++++++++++++++ .../templates/dell/run-with-safety.yaml | 132 +++++++++ llama_stack/templates/dell/run.yaml | 123 ++++++++ .../templates/fireworks/run-with-safety.yaml | 268 +++++++++++++++++ llama_stack/templates/fireworks/run.yaml | 258 ++++++++++++++++ llama_stack/templates/groq/run.yaml | 207 +++++++++++++ .../hf-endpoint/run-with-safety.yaml | 139 +++++++++ llama_stack/templates/hf-endpoint/run.yaml | 129 ++++++++ .../hf-serverless/run-with-safety.yaml | 139 +++++++++ llama_stack/templates/hf-serverless/run.yaml | 129 ++++++++ llama_stack/templates/llama_api/run.yaml | 166 +++++++++++ .../meta-reference-gpu/run-with-safety.yaml | 2 + .../templates/meta-reference-gpu/run.yaml | 2 + .../templates/ollama/run-with-safety.yaml | 160 ++++++++++ llama_stack/templates/ollama/run.yaml | 150 ++++++++++ llama_stack/templates/open-benchmark/run.yaml | 2 + .../passthrough/run-with-safety.yaml | 152 ++++++++++ llama_stack/templates/passthrough/run.yaml | 142 +++++++++ llama_stack/templates/postgres-demo/run.yaml | 2 + .../remote-vllm/run-with-safety.yaml | 149 ++++++++++ llama_stack/templates/remote-vllm/run.yaml | 137 +++++++++ llama_stack/templates/sambanova/run.yaml | 214 ++++++++++++++ llama_stack/templates/starter/run.yaml | 2 + .../templates/tgi/run-with-safety.yaml | 129 ++++++++ llama_stack/templates/tgi/run.yaml | 128 ++++++++ .../templates/together/run-with-safety.yaml | 276 ++++++++++++++++++ llama_stack/templates/together/run.yaml | 266 +++++++++++++++++ llama_stack/templates/vllm-gpu/run.yaml | 2 + llama_stack/templates/watsonx/run.yaml | 2 + 34 files changed, 4140 insertions(+), 2 deletions(-) create mode 100644 llama_stack/templates/bedrock/run.yaml create mode 100644 llama_stack/templates/cerebras/run.yaml create mode 100644 llama_stack/templates/ci-tests/run.yaml create mode 100644 llama_stack/templates/dell/run-with-safety.yaml create mode 100644 llama_stack/templates/dell/run.yaml create mode 100644 llama_stack/templates/fireworks/run-with-safety.yaml create mode 100644 llama_stack/templates/fireworks/run.yaml create mode 100644 llama_stack/templates/groq/run.yaml create mode 100644 llama_stack/templates/hf-endpoint/run-with-safety.yaml create mode 100644 llama_stack/templates/hf-endpoint/run.yaml create mode 100644 llama_stack/templates/hf-serverless/run-with-safety.yaml create mode 100644 llama_stack/templates/hf-serverless/run.yaml create mode 100644 llama_stack/templates/llama_api/run.yaml create mode 100644 llama_stack/templates/ollama/run-with-safety.yaml create mode 100644 llama_stack/templates/ollama/run.yaml create mode 100644 llama_stack/templates/passthrough/run-with-safety.yaml create mode 100644 llama_stack/templates/passthrough/run.yaml create mode 100644 llama_stack/templates/remote-vllm/run-with-safety.yaml create mode 100644 llama_stack/templates/remote-vllm/run.yaml create mode 100644 llama_stack/templates/sambanova/run.yaml create mode 100644 llama_stack/templates/tgi/run-with-safety.yaml create mode 100644 llama_stack/templates/tgi/run.yaml create mode 100644 llama_stack/templates/together/run-with-safety.yaml create mode 100644 llama_stack/templates/together/run.yaml diff --git a/docs/source/providers/tool_runtime/remote_tavily-search.md b/docs/source/providers/tool_runtime/remote_tavily-search.md index 3dc31534d..73ee4af46 100644 --- a/docs/source/providers/tool_runtime/remote_tavily-search.md +++ b/docs/source/providers/tool_runtime/remote_tavily-search.md @@ -10,12 +10,16 @@ Tavily Search tool for AI-optimized web search with structured results. |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The Tavily Search API Key | | `max_results` | `` | No | 3 | The maximum number of results to return | +| `timeout` | `` | No | 30.0 | HTTP request timeout for the API | +| `connect_timeout` | `` | No | 10.0 | HTTP connection timeout in seconds for the API | ## Sample Configuration ```yaml api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 +timeout: 30.0 +connect_timeout: 10.0 ``` diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ae7b41d50..ec409ab55 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -32,6 +32,6 @@ class TavilySearchToolConfig(BaseModel): return { "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, - "timeout:" 30.0, + "timeout": 30.0, "connect_timeout": 10.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index cdaf78d4a..d89c9b101 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,7 +70,7 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) async with httpx.AsyncClient() as client: response = await client.post( diff --git a/llama_stack/templates/bedrock/run.yaml b/llama_stack/templates/bedrock/run.yaml new file mode 100644 index 000000000..632fa5b02 --- /dev/null +++ b/llama_stack/templates/bedrock/run.yaml @@ -0,0 +1,144 @@ +version: 2 +image_name: bedrock +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: bedrock + provider_type: remote::bedrock + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/faiss_store.db + safety: + - provider_id: bedrock + provider_type: remote::bedrock + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/bedrock}/inference_store.db +models: +- metadata: {} + model_id: meta.llama3-1-8b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: bedrock + provider_model_id: meta.llama3-1-8b-instruct-v1:0 + model_type: llm +- metadata: {} + model_id: meta.llama3-1-70b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: bedrock + provider_model_id: meta.llama3-1-70b-instruct-v1:0 + model_type: llm +- metadata: {} + model_id: meta.llama3-1-405b-instruct-v1:0 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: bedrock + provider_model_id: meta.llama3-1-405b-instruct-v1:0 + model_type: llm +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/cerebras/run.yaml b/llama_stack/templates/cerebras/run.yaml new file mode 100644 index 000000000..bda867555 --- /dev/null +++ b/llama_stack/templates/cerebras/run.yaml @@ -0,0 +1,142 @@ +version: 2 +image_name: cerebras +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: cerebras + provider_type: remote::cerebras + config: + base_url: https://api.cerebras.ai + api_key: ${env.CEREBRAS_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/faiss_store.db + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/responses_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/trace_store.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/cerebras}/inference_store.db +models: +- metadata: {} + model_id: llama3.1-8b + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: cerebras + provider_model_id: llama3.1-8b + model_type: llm +- metadata: {} + model_id: llama-3.3-70b + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: cerebras + provider_model_id: llama-3.3-70b + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/ci-tests/run.yaml b/llama_stack/templates/ci-tests/run.yaml new file mode 100644 index 000000000..6501fb7e5 --- /dev/null +++ b/llama_stack/templates/ci-tests/run.yaml @@ -0,0 +1,241 @@ +version: 2 +image_name: ci-tests +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/inference_store.db +models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: nomic-ai/nomic-embed-text-v1.5 + provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 + model_type: embedding +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/dell/run-with-safety.yaml b/llama_stack/templates/dell/run-with-safety.yaml new file mode 100644 index 000000000..121087d8c --- /dev/null +++ b/llama_stack/templates/dell/run-with-safety.yaml @@ -0,0 +1,132 @@ +version: 2 +image_name: dell +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: ${env.DEH_URL} + - provider_id: tgi1 + provider_type: remote::tgi + config: + url: ${env.DEH_SAFETY_URL} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMA_URL} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: tgi1 + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: ${env.SAFETY_MODEL} +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: brave-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/dell/run.yaml b/llama_stack/templates/dell/run.yaml new file mode 100644 index 000000000..de661bfda --- /dev/null +++ b/llama_stack/templates/dell/run.yaml @@ -0,0 +1,123 @@ +version: 2 +image_name: dell +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: ${env.DEH_URL} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: chromadb + provider_type: remote::chromadb + config: + url: ${env.CHROMA_URL} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi0 + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: brave-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/fireworks/run-with-safety.yaml b/llama_stack/templates/fireworks/run-with-safety.yaml new file mode 100644 index 000000000..26212e57d --- /dev/null +++ b/llama_stack/templates/fireworks/run-with-safety.yaml @@ -0,0 +1,268 @@ +version: 2 +image_name: fireworks +apis: +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + - provider_id: llama-guard-vision + provider_type: inline::llama-guard + config: {} + - provider_id: code-scanner + provider_type: inline::code-scanner + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db +models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: nomic-ai/nomic-embed-text-v1.5 + provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 + model_type: embedding +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B + provider_id: llama-guard +- shield_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: llama-guard-vision +- shield_id: CodeScanner + provider_id: code-scanner +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/fireworks/run.yaml b/llama_stack/templates/fireworks/run.yaml new file mode 100644 index 000000000..38860cdb9 --- /dev/null +++ b/llama_stack/templates/fireworks/run.yaml @@ -0,0 +1,258 @@ +version: 2 +image_name: fireworks +apis: +- agents +- datasetio +- eval +- files +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference/v1 + api_key: ${env.FIREWORKS_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/fireworks/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/files_metadata.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/fireworks}/inference_store.db +models: +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-8b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p1-405b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-3b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-11b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p2-90b-vision-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-v3p3-70b-instruct + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-8b + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-8b + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama-guard-3-11b-vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama-guard-3-11b-vision + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-scout-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-scout-instruct-basic + model_type: llm +- metadata: {} + model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: fireworks + provider_model_id: accounts/fireworks/models/llama4-maverick-instruct-basic + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: nomic-ai/nomic-embed-text-v1.5 + provider_id: fireworks + provider_model_id: nomic-ai/nomic-embed-text-v1.5 + model_type: embedding +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/groq/run.yaml b/llama_stack/templates/groq/run.yaml new file mode 100644 index 000000000..ecc533ad9 --- /dev/null +++ b/llama_stack/templates/groq/run.yaml @@ -0,0 +1,207 @@ +version: 2 +image_name: groq +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: groq + provider_type: remote::groq + config: + url: https://api.groq.com + api_key: ${env.GROQ_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/groq}/inference_store.db +models: +- metadata: {} + model_id: groq/llama3-8b-8192 + provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-3.1-8B-Instruct + provider_id: groq + provider_model_id: groq/llama3-8b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.1-8b-instant + provider_id: groq + provider_model_id: groq/llama-3.1-8b-instant + model_type: llm +- metadata: {} + model_id: groq/llama3-70b-8192 + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama3-70b-8192 + model_type: llm +- metadata: {} + model_id: groq/llama-3.3-70b-versatile + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-3.3-70B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.3-70b-versatile + model_type: llm +- metadata: {} + model_id: groq/llama-3.2-3b-preview + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-3.2-3B-Instruct + provider_id: groq + provider_model_id: groq/llama-3.2-3b-preview + model_type: llm +- metadata: {} + model_id: groq/llama-4-scout-17b-16e-instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct + provider_id: groq + provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: groq + provider_model_id: groq/meta-llama/llama-4-scout-17b-16e-instruct + model_type: llm +- metadata: {} + model_id: groq/llama-4-maverick-17b-128e-instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq + provider_model_id: groq/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct + provider_id: groq + provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: {} + model_id: groq/meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: groq + provider_model_id: groq/meta-llama/llama-4-maverick-17b-128e-instruct + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/hf-endpoint/run-with-safety.yaml b/llama_stack/templates/hf-endpoint/run-with-safety.yaml new file mode 100644 index 000000000..550651cf8 --- /dev/null +++ b/llama_stack/templates/hf-endpoint/run-with-safety.yaml @@ -0,0 +1,139 @@ +version: 2 +image_name: hf-endpoint +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: hf-endpoint + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + - provider_id: hf-endpoint-safety + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.SAFETY_INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: hf-endpoint + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: hf-endpoint-safety + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: ${env.SAFETY_MODEL} +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/hf-endpoint/run.yaml b/llama_stack/templates/hf-endpoint/run.yaml new file mode 100644 index 000000000..77e7c8cfc --- /dev/null +++ b/llama_stack/templates/hf-endpoint/run.yaml @@ -0,0 +1,129 @@ +version: 2 +image_name: hf-endpoint +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: hf-endpoint + provider_type: remote::hf::endpoint + config: + endpoint_name: ${env.INFERENCE_ENDPOINT_NAME} + api_token: ${env.HF_API_TOKEN} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-endpoint}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: hf-endpoint + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/hf-serverless/run-with-safety.yaml b/llama_stack/templates/hf-serverless/run-with-safety.yaml new file mode 100644 index 000000000..5344d4347 --- /dev/null +++ b/llama_stack/templates/hf-serverless/run-with-safety.yaml @@ -0,0 +1,139 @@ +version: 2 +image_name: hf-serverless +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: hf-serverless + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + - provider_id: hf-serverless-safety + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.SAFETY_MODEL} + api_token: ${env.HF_API_TOKEN} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: hf-serverless + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: hf-serverless-safety + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: ${env.SAFETY_MODEL} +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/hf-serverless/run.yaml b/llama_stack/templates/hf-serverless/run.yaml new file mode 100644 index 000000000..de7ba3a10 --- /dev/null +++ b/llama_stack/templates/hf-serverless/run.yaml @@ -0,0 +1,129 @@ +version: 2 +image_name: hf-serverless +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: hf-serverless + provider_type: remote::hf::serverless + config: + huggingface_repo: ${env.INFERENCE_MODEL} + api_token: ${env.HF_API_TOKEN} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/hf-serverless}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: hf-serverless + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/llama_api/run.yaml b/llama_stack/templates/llama_api/run.yaml new file mode 100644 index 000000000..83cf5d5b2 --- /dev/null +++ b/llama_stack/templates/llama_api/run.yaml @@ -0,0 +1,166 @@ +version: 2 +image_name: llama_api +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: llama-openai-compat + provider_type: remote::llama-openai-compat + config: + openai_compat_api_base: https://api.llama.com/compat/v1/ + api_key: ${env.LLAMA_API_KEY:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: sqlite-vec + provider_type: inline::sqlite-vec + config: + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/sqlite_vec.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/llama_api}/inference_store.db +models: +- metadata: {} + model_id: Llama-3.3-70B-Instruct + provider_id: llama-openai-compat + provider_model_id: Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: llama-openai-compat + provider_model_id: Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: Llama-4-Scout-17B-16E-Instruct-FP8 + provider_id: llama-openai-compat + provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: llama-openai-compat + provider_model_id: Llama-4-Scout-17B-16E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: llama-openai-compat + provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: llama-openai-compat + provider_model_id: Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml index 49657a680..582974532 100644 --- a/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml +++ b/llama_stack/templates/meta-reference-gpu/run-with-safety.yaml @@ -107,6 +107,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/meta-reference-gpu/run.yaml b/llama_stack/templates/meta-reference-gpu/run.yaml index 2923b5faf..88c58b493 100644 --- a/llama_stack/templates/meta-reference-gpu/run.yaml +++ b/llama_stack/templates/meta-reference-gpu/run.yaml @@ -97,6 +97,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/ollama/run-with-safety.yaml b/llama_stack/templates/ollama/run-with-safety.yaml new file mode 100644 index 000000000..2838d6069 --- /dev/null +++ b/llama_stack/templates/ollama/run-with-safety.yaml @@ -0,0 +1,160 @@ +version: 2 +image_name: ollama +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + raise_on_connect_error: true + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + - provider_id: code-scanner + provider_type: inline::code-scanner + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: ollama + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding +shields: +- shield_id: ${env.SAFETY_MODEL} + provider_id: llama-guard +- shield_id: CodeScanner + provider_id: code-scanner +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml new file mode 100644 index 000000000..129bf4912 --- /dev/null +++ b/llama_stack/templates/ollama/run.yaml @@ -0,0 +1,150 @@ +version: 2 +image_name: ollama +apis: +- agents +- datasetio +- eval +- files +- inference +- post_training +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:=http://localhost:11434} + raise_on_connect_error: true + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + files: + - provider_id: meta-reference-files + provider_type: inline::localfs + config: + storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ollama/files} + metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/files_metadata.db + post_training: + - provider_id: huggingface + provider_type: inline::huggingface + config: + checkpoint_format: huggingface + distributed_backend: null + device: cpu + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/open-benchmark/run.yaml b/llama_stack/templates/open-benchmark/run.yaml index 76c029864..666b575dc 100644 --- a/llama_stack/templates/open-benchmark/run.yaml +++ b/llama_stack/templates/open-benchmark/run.yaml @@ -116,6 +116,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/passthrough/run-with-safety.yaml b/llama_stack/templates/passthrough/run-with-safety.yaml new file mode 100644 index 000000000..e33009b6d --- /dev/null +++ b/llama_stack/templates/passthrough/run-with-safety.yaml @@ -0,0 +1,152 @@ +version: 2 +image_name: passthrough +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: passthrough + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + - provider_id: llama-guard-vision + provider_type: inline::llama-guard + config: {} + - provider_id: code-scanner + provider_type: inline::code-scanner + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db +models: +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: passthrough + provider_model_id: llama3.1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: passthrough + provider_model_id: llama3.2-11b-vision-instruct + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B + provider_id: llama-guard +- shield_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: llama-guard-vision +- shield_id: CodeScanner + provider_id: code-scanner +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/passthrough/run.yaml b/llama_stack/templates/passthrough/run.yaml new file mode 100644 index 000000000..3578b3ad8 --- /dev/null +++ b/llama_stack/templates/passthrough/run.yaml @@ -0,0 +1,142 @@ +version: 2 +image_name: passthrough +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: passthrough + provider_type: remote::passthrough + config: + url: ${env.PASSTHROUGH_URL} + api_key: ${env.PASSTHROUGH_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/passthrough}/inference_store.db +models: +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: passthrough + provider_model_id: llama3.1-8b-instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: passthrough + provider_model_id: llama3.2-11b-vision-instruct + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/postgres-demo/run.yaml b/llama_stack/templates/postgres-demo/run.yaml index 2b6b1a64f..ce3ea23bf 100644 --- a/llama_stack/templates/postgres-demo/run.yaml +++ b/llama_stack/templates/postgres-demo/run.yaml @@ -65,6 +65,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/remote-vllm/run-with-safety.yaml b/llama_stack/templates/remote-vllm/run-with-safety.yaml new file mode 100644 index 000000000..0faee221d --- /dev/null +++ b/llama_stack/templates/remote-vllm/run-with-safety.yaml @@ -0,0 +1,149 @@ +version: 2 +image_name: remote-vllm +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: vllm-safety + provider_type: remote::vllm + config: + url: ${env.SAFETY_VLLM_URL} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: vllm-safety + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: ${env.SAFETY_MODEL} +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/remote-vllm/run.yaml b/llama_stack/templates/remote-vllm/run.yaml new file mode 100644 index 000000000..80da1f534 --- /dev/null +++ b/llama_stack/templates/remote-vllm/run.yaml @@ -0,0 +1,137 @@ +version: 2 +image_name: remote-vllm +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: vllm-inference + provider_type: remote::vllm + config: + url: ${env.VLLM_URL:=http://localhost:8000/v1} + max_tokens: ${env.VLLM_MAX_TOKENS:=4096} + api_token: ${env.VLLM_API_TOKEN:=fake} + tls_verify: ${env.VLLM_TLS_VERIFY:=true} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/responses_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/trace_store.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/remote-vllm}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: vllm-inference + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/sambanova/run.yaml b/llama_stack/templates/sambanova/run.yaml new file mode 100644 index 000000000..39e6290e1 --- /dev/null +++ b/llama_stack/templates/sambanova/run.yaml @@ -0,0 +1,214 @@ +version: 2 +image_name: sambanova +apis: +- agents +- inference +- safety +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/faiss_store.db + - provider_id: ${env.ENABLE_CHROMADB:+chromadb} + provider_type: remote::chromadb + config: + url: ${env.CHROMADB_URL:=} + - provider_id: ${env.ENABLE_PGVECTOR:+pgvector} + provider_type: remote::pgvector + config: + host: ${env.PGVECTOR_HOST:=localhost} + port: ${env.PGVECTOR_PORT:=5432} + db: ${env.PGVECTOR_DB:=} + user: ${env.PGVECTOR_USER:=} + password: ${env.PGVECTOR_PASSWORD:=} + safety: + - provider_id: sambanova + provider_type: remote::sambanova + config: + url: https://api.sambanova.ai/v1 + api_key: ${env.SAMBANOVA_API_KEY} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/trace_store.db + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/sambanova}/inference_store.db +models: +- metadata: {} + model_id: sambanova/Meta-Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.1-8B-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Meta-Llama-3.1-405B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.1-405B-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Meta-Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.2-1B-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Meta-Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.2-3B-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Meta-Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-3.3-70B-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-3.2-11B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-3.2-90B-Vision-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: sambanova + provider_model_id: sambanova/Llama-4-Maverick-17B-128E-Instruct + model_type: llm +- metadata: {} + model_id: sambanova/Meta-Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: sambanova + provider_model_id: sambanova/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B + provider_shield_id: sambanova/Meta-Llama-Guard-3-8B +- shield_id: sambanova/Meta-Llama-Guard-3-8B + provider_shield_id: sambanova/Meta-Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index b3dfe32d5..88459a2c4 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -243,6 +243,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/tgi/run-with-safety.yaml b/llama_stack/templates/tgi/run-with-safety.yaml new file mode 100644 index 000000000..400b654e7 --- /dev/null +++ b/llama_stack/templates/tgi/run-with-safety.yaml @@ -0,0 +1,129 @@ +version: 2 +image_name: tgi +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi-inference + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: tgi-safety + provider_type: remote::tgi + config: + url: ${env.TGI_SAFETY_URL} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi-inference + model_type: llm +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: tgi-safety + model_type: llm +shields: +- shield_id: ${env.SAFETY_MODEL} +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/tgi/run.yaml b/llama_stack/templates/tgi/run.yaml new file mode 100644 index 000000000..b4636ddfb --- /dev/null +++ b/llama_stack/templates/tgi/run.yaml @@ -0,0 +1,128 @@ +version: 2 +image_name: tgi +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: tgi-inference + provider_type: remote::tgi + config: + url: ${env.TGI_URL} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/tgi}/inference_store.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: tgi-inference + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: [] +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +server: + port: 8321 diff --git a/llama_stack/templates/together/run-with-safety.yaml b/llama_stack/templates/together/run-with-safety.yaml new file mode 100644 index 000000000..f6df14477 --- /dev/null +++ b/llama_stack/templates/together/run-with-safety.yaml @@ -0,0 +1,276 @@ +version: 2 +image_name: together +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + - provider_id: llama-guard-vision + provider_type: inline::llama-guard + config: {} + - provider_id: code-scanner + provider_type: inline::code-scanner + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db +models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: togethercomputer/m2-bert-80M-8k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval + model_type: embedding +- metadata: + embedding_dimension: 768 + context_length: 32768 + model_id: togethercomputer/m2-bert-80M-32k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval + model_type: embedding +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B + provider_id: llama-guard +- shield_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: llama-guard-vision +- shield_id: CodeScanner + provider_id: code-scanner +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/together/run.yaml b/llama_stack/templates/together/run.yaml new file mode 100644 index 000000000..ef64cf8ee --- /dev/null +++ b/llama_stack/templates/together/run.yaml @@ -0,0 +1,266 @@ +version: 2 +image_name: together +apis: +- agents +- datasetio +- eval +- inference +- safety +- scoring +- telemetry +- tool_runtime +- vector_io +providers: + inference: + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: ${env.TOGETHER_API_KEY:=} + - provider_id: sentence-transformers + provider_type: inline::sentence-transformers + config: {} + vector_io: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: + excluded_categories: [] + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/agents_store.db + responses_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/responses_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + service_name: "${env.OTEL_SERVICE_NAME:=\u200B}" + sinks: ${env.TELEMETRY_SINKS:=console,sqlite} + sqlite_db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/trace_store.db + eval: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/meta_reference_eval.db + datasetio: + - provider_id: huggingface + provider_type: remote::huggingface + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/huggingface_datasetio.db + - provider_id: localfs + provider_type: inline::localfs + config: + kvstore: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/localfs_datasetio.db + scoring: + - provider_id: basic + provider_type: inline::basic + config: {} + - provider_id: llm-as-judge + provider_type: inline::llm-as-judge + config: {} + - provider_id: braintrust + provider_type: inline::braintrust + config: + openai_api_key: ${env.OPENAI_API_KEY:=} + tool_runtime: + - provider_id: brave-search + provider_type: remote::brave-search + config: + api_key: ${env.BRAVE_SEARCH_API_KEY:=} + max_results: 3 + - provider_id: tavily-search + provider_type: remote::tavily-search + config: + api_key: ${env.TAVILY_SEARCH_API_KEY:=} + max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol + config: {} + - provider_id: wolfram-alpha + provider_type: remote::wolfram-alpha + config: + api_key: ${env.WOLFRAM_ALPHA_API_KEY:=} +metadata_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/registry.db +inference_store: + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/together}/inference_store.db +models: +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-8B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.1-405B-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-3B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-3B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-11B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.2-90B-Vision-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-3.3-70B-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-3.3-70B-Instruct-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Meta-Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-8B + provider_id: together + provider_model_id: meta-llama/Meta-Llama-Guard-3-8B + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-Guard-3-11B-Vision + provider_id: together + provider_model_id: meta-llama/Llama-Guard-3-11B-Vision-Turbo + model_type: llm +- metadata: + embedding_dimension: 768 + context_length: 8192 + model_id: togethercomputer/m2-bert-80M-8k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-8k-retrieval + model_type: embedding +- metadata: + embedding_dimension: 768 + context_length: 32768 + model_id: togethercomputer/m2-bert-80M-32k-retrieval + provider_id: together + provider_model_id: togethercomputer/m2-bert-80M-32k-retrieval + model_type: embedding +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Scout-17B-16E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Scout-17B-16E-Instruct + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: {} + model_id: together/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + provider_id: together + provider_model_id: meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8 + model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: sentence-transformers + model_type: embedding +shields: +- shield_id: meta-llama/Llama-Guard-3-8B +vector_dbs: [] +datasets: [] +scoring_fns: [] +benchmarks: [] +tool_groups: +- toolgroup_id: builtin::websearch + provider_id: tavily-search +- toolgroup_id: builtin::rag + provider_id: rag-runtime +- toolgroup_id: builtin::wolfram_alpha + provider_id: wolfram-alpha +server: + port: 8321 diff --git a/llama_stack/templates/vllm-gpu/run.yaml b/llama_stack/templates/vllm-gpu/run.yaml index 4241569a4..f8ff82c97 100644 --- a/llama_stack/templates/vllm-gpu/run.yaml +++ b/llama_stack/templates/vllm-gpu/run.yaml @@ -96,6 +96,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} diff --git a/llama_stack/templates/watsonx/run.yaml b/llama_stack/templates/watsonx/run.yaml index afbbdb917..9b0f5416d 100644 --- a/llama_stack/templates/watsonx/run.yaml +++ b/llama_stack/templates/watsonx/run.yaml @@ -93,6 +93,8 @@ providers: config: api_key: ${env.TAVILY_SEARCH_API_KEY:=} max_results: 3 + timeout: 30.0 + connect_timeout: 10.0 - provider_id: rag-runtime provider_type: inline::rag-runtime config: {} From f81b8c53c2a2e9be4e675f548e86c4b12ecedd3f Mon Sep 17 00:00:00 2001 From: anigasan Date: Mon, 7 Jul 2025 12:39:33 -0700 Subject: [PATCH 11/17] made changes to tavily_search, simplified config logic --- .../remote/tool_runtime/tavily_search/tavily_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index d89c9b101..5158f4d40 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,9 +70,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + timeout = httpx.Timeout(timeout=self.config.timeout) - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From 15d27a251f7de354f255acc2527ccab3f4164a61 Mon Sep 17 00:00:00 2001 From: anigasan Date: Mon, 7 Jul 2025 12:44:45 -0700 Subject: [PATCH 12/17] made changes to configuration --- .../providers/remote/tool_runtime/tavily_search/config.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index ec409ab55..b4ddd9cb4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -22,10 +22,6 @@ class TavilySearchToolConfig(BaseModel): default=30.0, description="HTTP request timeout for the API", ) - connect_timeout: float = Field( - default=10.0, - description="HTTP connection timeout in seconds for the API", - ) @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: @@ -33,5 +29,4 @@ class TavilySearchToolConfig(BaseModel): "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, "timeout": 30.0, - "connect_timeout": 10.0, } From 349e80d06ad08faea293ec3de28a504cc04cf063 Mon Sep 17 00:00:00 2001 From: anigasan Date: Thu, 3 Jul 2025 12:21:28 -0700 Subject: [PATCH 13/17] Changed config and tavily_search for tavily API --- .../providers/remote/tool_runtime/tavily_search/config.py | 2 +- .../remote/tool_runtime/tavily_search/tavily_search.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index b4ddd9cb4..e3046343e 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -26,7 +26,7 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", "max_results": 3, "timeout": 30.0, } diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index 5158f4d40..e8ca36cf4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,9 +69,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - - timeout = httpx.Timeout(timeout=self.config.timeout) - + + # Configure timeout for external API calls using config values + timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", From f288f92f888fd2a520dcd40e9e7451096a0994eb Mon Sep 17 00:00:00 2001 From: anigasan Date: Fri, 4 Jul 2025 11:48:47 -0700 Subject: [PATCH 14/17] config file changes --- .../providers/remote/tool_runtime/tavily_search/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py index e3046343e..b4ddd9cb4 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/config.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/config.py @@ -26,7 +26,7 @@ class TavilySearchToolConfig(BaseModel): @classmethod def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]: return { - "api_key": "${env.TAVILY_SEARCH_API_KEY:+}", + "api_key": "${env.TAVILY_SEARCH_API_KEY:=}", "max_results": 3, "timeout": 30.0, } From 8c4f489c5d467293bcb29bcfe517287134e1f684 Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 12:52:47 -0700 Subject: [PATCH 15/17] commits --- .../remote/tool_runtime/tavily_search/tavily_search.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index e8ca36cf4..cdaf78d4a 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -69,11 +69,10 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - - # Configure timeout for external API calls using config values - timeout = httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) - - async with httpx.AsyncClient(timeout=timeout) as client: + + timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + + async with httpx.AsyncClient() as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]}, From b6629240570c4d901673ec9ebacaf40c8dc36688 Mon Sep 17 00:00:00 2001 From: anigasan Date: Sun, 6 Jul 2025 18:44:27 -0700 Subject: [PATCH 16/17] more commits --- .../remote/tool_runtime/tavily_search/tavily_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index cdaf78d4a..d89c9b101 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,7 +70,7 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - timeout = https.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) async with httpx.AsyncClient() as client: response = await client.post( From ed6003cf459d56c438311c45e78184989d4fa3d0 Mon Sep 17 00:00:00 2001 From: anigasan Date: Mon, 7 Jul 2025 12:39:33 -0700 Subject: [PATCH 17/17] made changes to tavily_search, simplified config logic --- .../remote/tool_runtime/tavily_search/tavily_search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py index d89c9b101..5158f4d40 100644 --- a/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py +++ b/llama_stack/providers/remote/tool_runtime/tavily_search/tavily_search.py @@ -70,9 +70,9 @@ class TavilySearchToolRuntimeImpl(ToolGroupsProtocolPrivate, ToolRuntime, NeedsR async def invoke_tool(self, tool_name: str, kwargs: dict[str, Any]) -> ToolInvocationResult: api_key = self._get_api_key() - httpx.Timeout(timeout=self.config.timeout, connect=self.config.connect_timeout) + timeout = httpx.Timeout(timeout=self.config.timeout) - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=timeout) as client: response = await client.post( "https://api.tavily.com/search", json={"api_key": api_key, "query": kwargs["query"]},