From 6824d23dc903ae16cfb4e4af299150f655a6133c Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Thu, 27 Feb 2025 22:35:52 -0500 Subject: [PATCH 01/22] test: Only run embedding tests for remote::nvidia (#1317) This fixes release build failure https://github.com/meta-llama/llama-stack-ops/actions/runs/13580302250/job/37964972403: ``` =================================== FAILURES =================================== ______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] _______ llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE ______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] _______ llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE _______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] _______ llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE _______ test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] _______ llama-stack/tests/client-sdk/inference/test_embedding.py:166: in test_embedding_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE _________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] _________ llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE _________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] __________ llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE ________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] _________ llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE _________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] _________ llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE ________ test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] _________ llama-stack/tests/client-sdk/inference/test_embedding.py:223: in test_embedding_text_truncation_error with pytest.raises(BadRequestError) as excinfo: E Failed: DID NOT RAISE =========================== short test summary info ============================ FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-None] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-text-none] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-None] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_truncation_error[txt=8B:emb=MiniLM-long-str-none] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-NONE] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-END] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-START] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-left] - Failed: DID NOT RAISE FAILED llama-stack/tests/client-sdk/inference/test_embedding.py::test_embedding_text_truncation_error[txt=8B:emb=MiniLM-right] - Failed: DID NOT RAISE = 9 failed, 48 passed, 2 skipped, 3 deselected, 3 xfailed, 1 xpassed, 121 warnings in 90.16s (0:01:30) = Error: Process completed with exit code 1. ``` Signed-off-by: Yuan Tang --- tests/client-sdk/inference/test_embedding.py | 39 ++++++++++++++++---- 1 file changed, 31 insertions(+), 8 deletions(-) diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 46a901d62..c46a6517f 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -75,6 +75,7 @@ DUMMY_IMAGE_URL = ImageContentItem( image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image" ) DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") +SUPPORTED_PROVIDERS = {"remote::nvidia"} @pytest.mark.parametrize( @@ -88,7 +89,9 @@ DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64st "list[text]", ], ) -def test_embedding_text(llama_stack_client, embedding_model_id, contents): +def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) @@ -108,7 +111,9 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents): ], ) @pytest.mark.xfail(reason="Media is not supported") -def test_embedding_image(llama_stack_client, embedding_model_id, contents): +def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) @@ -134,7 +139,11 @@ def test_embedding_image(llama_stack_client, embedding_model_id, contents): "short", ], ) -def test_embedding_truncation(llama_stack_client, embedding_model_id, text_truncation, contents): +def test_embedding_truncation( + llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=contents, text_truncation=text_truncation ) @@ -162,7 +171,11 @@ def test_embedding_truncation(llama_stack_client, embedding_model_id, text_trunc "long-str", ], ) -def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text_truncation, contents): +def test_embedding_truncation_error( + llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError) as excinfo: llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation @@ -170,7 +183,9 @@ def test_embedding_truncation_error(llama_stack_client, embedding_model_id, text @pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") -def test_embedding_output_dimension(llama_stack_client, embedding_model_id): +def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) test_response = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 @@ -180,7 +195,9 @@ def test_embedding_output_dimension(llama_stack_client, embedding_model_id): @pytest.mark.xfail(reason="Only valid for model supporting task type") -def test_embedding_task_type(llama_stack_client, embedding_model_id): +def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") query_embedding = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" ) @@ -199,7 +216,9 @@ def test_embedding_task_type(llama_stack_client, embedding_model_id): "start", ], ) -def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation): +def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation ) @@ -219,7 +238,11 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_ "right", ], ) -def test_embedding_text_truncation_error(llama_stack_client, embedding_model_id, text_truncation): +def test_embedding_text_truncation_error( + llama_stack_client, embedding_model_id, text_truncation, inference_provider_type +): + if inference_provider_type not in SUPPORTED_PROVIDERS: + pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError) as excinfo: llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation From 7780fc92d593ca2fe783637c665403fdad84de19 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 27 Feb 2025 20:13:00 -0800 Subject: [PATCH 02/22] fix: update getting_started notebook to pass nbeval (#1318) # What does this PR do? - See https://github.com/meta-llama/llama-stack-ops/actions/runs/13580825399/job/37966677766 - Together's structured decoding API is flaky, add skip to cell - Enable cell 21 to pass cell 21-23 [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan image [//]: # (## Documentation) --- docs/getting_started.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index 8ae6fed24..21436327e 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -1145,6 +1145,7 @@ } ], "source": [ + "# NBVAL_SKIP\n", "from pydantic import BaseModel\n", "\n", "\n", @@ -2885,7 +2886,6 @@ } ], "source": [ - "# NBVAL_SKIP\n", "from llama_stack_client.lib.agents.agent import Agent\n", "from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", @@ -4326,7 +4326,7 @@ "provenance": [] }, "kernelspec": { - "display_name": "toolchain", + "display_name": "master", "language": "python", "name": "python3" }, From 999195fe5b6416c092cbf8c5dabc2d221dad33f1 Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Thu, 27 Feb 2025 20:53:47 -0800 Subject: [PATCH 03/22] fix: [Litellm]Do not swallow first token (#1316) `ChatCompletionResponseEventType: start` is ignored and not yielded in the agent_instance as we expect that to not have any content. However, litellm sends first event as `ChatCompletionResponseEventType: start` with content ( which was the first token that we were skipping ) ``` LLAMA_STACK_CONFIG=dev pytest -s -v tests/client-sdk/agents/test_agents.py --inference-model "openai/gpt-4o-mini" -k test_agent_simple ``` This was failing before ( since the word hello was not in the final response ) --- .../utils/inference/openai_compat.py | 25 +++++++++---------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index eaf5ad2e1..d0fdf6385 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -6,7 +6,7 @@ import json import logging import warnings -from typing import AsyncGenerator, Dict, Generator, Iterable, List, Optional, Union +from typing import AsyncGenerator, Dict, Iterable, List, Optional, Union from openai import AsyncStream from openai.types.chat import ( @@ -841,14 +841,13 @@ async def convert_openai_chat_completion_stream( Convert a stream of OpenAI chat completion chunks into a stream of ChatCompletionResponseStreamChunk. """ - - # generate a stream of ChatCompletionResponseEventType: start -> progress -> progress -> ... - def _event_type_generator() -> Generator[ChatCompletionResponseEventType, None, None]: - yield ChatCompletionResponseEventType.start - while True: - yield ChatCompletionResponseEventType.progress - - event_type = _event_type_generator() + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta=TextDelta(text=""), + ) + ) + event_type = ChatCompletionResponseEventType.progress stop_reason = None toolcall_buffer = {} @@ -868,7 +867,7 @@ async def convert_openai_chat_completion_stream( if choice.delta.content: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=TextDelta(text=choice.delta.content), logprobs=_convert_openai_logprobs(logprobs), ) @@ -909,7 +908,7 @@ async def convert_openai_chat_completion_stream( toolcall_buffer["content"] += delta yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=ToolCallDelta( tool_call=delta, parse_status=ToolCallParseStatus.in_progress, @@ -920,7 +919,7 @@ async def convert_openai_chat_completion_stream( else: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=TextDelta(text=choice.delta.content or ""), logprobs=_convert_openai_logprobs(logprobs), ) @@ -931,7 +930,7 @@ async def convert_openai_chat_completion_stream( toolcall_buffer["content"] += delta yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=next(event_type), + event_type=event_type, delta=ToolCallDelta( tool_call=delta, parse_status=ToolCallParseStatus.in_progress, From 4c8a0fa8dc1fcb316de64f5ec71521192f6ff11f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 22:49:06 -0800 Subject: [PATCH 04/22] fix: ensure ollama embedding model is registered properly in the template --- llama_stack/distribution/routers/routing_tables.py | 9 +-------- llama_stack/templates/ollama/ollama.py | 2 +- llama_stack/templates/ollama/run.yaml | 6 ++++++ 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index c2434e517..80e9ecb7c 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -318,14 +318,7 @@ class VectorDBsRoutingTable(CommonRoutingTableImpl, VectorDBs): ) model = await self.get_object_by_identifier("model", embedding_model) if model is None: - if embedding_model == "all-MiniLM-L6-v2": - raise ValueError( - "Embeddings are now served via Inference providers. " - "Please upgrade your run.yaml to include inline::sentence-transformer as an additional inference provider. " - "See https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/together/run.yaml for an example." - ) - else: - raise ValueError(f"Model {embedding_model} not found") + raise ValueError(f"Model {embedding_model} not found") if model.model_type != ModelType.embedding: raise ValueError(f"Model {embedding_model} is not an embedding model") if "embedding_dimension" not in model.metadata: diff --git a/llama_stack/templates/ollama/ollama.py b/llama_stack/templates/ollama/ollama.py index 83c7b1a63..3c24a41ba 100644 --- a/llama_stack/templates/ollama/ollama.py +++ b/llama_stack/templates/ollama/ollama.py @@ -93,7 +93,7 @@ def get_distribution_template() -> DistributionTemplate: "inference": [inference_provider], "vector_io": [vector_io_provider_sqlite], }, - default_models=[inference_model], + default_models=[inference_model, embedding_model], default_tool_groups=default_tool_groups, ), "run-with-safety.yaml": RunConfigSettings( diff --git a/llama_stack/templates/ollama/run.yaml b/llama_stack/templates/ollama/run.yaml index 0c82552c6..a2428688e 100644 --- a/llama_stack/templates/ollama/run.yaml +++ b/llama_stack/templates/ollama/run.yaml @@ -90,6 +90,12 @@ models: model_id: ${env.INFERENCE_MODEL} provider_id: ollama model_type: llm +- metadata: + embedding_dimension: 384 + model_id: all-MiniLM-L6-v2 + provider_id: ollama + provider_model_id: all-minilm:latest + model_type: embedding shields: [] vector_dbs: [] datasets: [] From ece354eeddc561910c1950294b3db92d7c2ab43f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 27 Feb 2025 22:54:34 -0800 Subject: [PATCH 05/22] test: dont hardcode faiss as provider in the tests please --- tests/client-sdk/agents/test_agents.py | 1 - tests/client-sdk/tool_runtime/test_rag_tool.py | 2 -- 2 files changed, 3 deletions(-) diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 6e3dc0739..8f68699b2 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -458,7 +458,6 @@ def test_rag_agent(llama_stack_client, agent_config, rag_tool_name): vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) llama_stack_client.tool_runtime.rag_tool.insert( documents=documents, diff --git a/tests/client-sdk/tool_runtime/test_rag_tool.py b/tests/client-sdk/tool_runtime/test_rag_tool.py index 40940f1ef..e330a10f5 100644 --- a/tests/client-sdk/tool_runtime/test_rag_tool.py +++ b/tests/client-sdk/tool_runtime/test_rag_tool.py @@ -24,7 +24,6 @@ def single_entry_vector_db_registry(llama_stack_client, empty_vector_db_registry vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) vector_dbs = [vector_db.identifier for vector_db in llama_stack_client.vector_dbs.list()] return vector_dbs @@ -121,7 +120,6 @@ def test_vector_db_insert_from_url_and_query(llama_stack_client, empty_vector_db vector_db_id=vector_db_id, embedding_model="all-MiniLM-L6-v2", embedding_dimension=384, - provider_id="faiss", ) # list to check memory bank is successfully registered From caffafd101730850085821d89fa8ec5e4cd0fa02 Mon Sep 17 00:00:00 2001 From: ehhuang Date: Thu, 27 Feb 2025 23:05:42 -0800 Subject: [PATCH 06/22] feat: update the default system prompt for 3.2/3.3 models (#1310) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Summary: The current prompt doesn't work well and tend to overindex on tool calling. This PR is not perfect, but should be an improvement over the current prompt. We can keep iterating. # Test Plan: Ran on a (small) eval with 20 HotpotQA examples. With current prompt: https://gist.github.com/ehhuang/9f967e62751907165eb13781ea968f5c { │ 'basic::equality': {'accuracy': {'accuracy': 0.2, 'num_correct': 4.0, 'num_total': 20}}, │ 'F1ScoringFn': { │ │ 'f1_average': 0.25333333333333335, │ │ 'precision_average': 0.23301767676767676, │ │ 'recall_average': 0.375 │ } } num_tool_calls=[5, 5, 5, 5, 5, 5, 2, 5, 5, 5, 5, 5, 2, 2, 1, 1, 2, 1, 2, 2] num_examples_with_tool_call=20 num_examples_with_pythontag=0 ######################################################### With new prompt: https://gist.github.com/ehhuang/6e4a8ecf54db68922c2be8700056f962 { │ 'basic::equality': {'accuracy': {'accuracy': 0.25, 'num_correct': 5.0, 'num_total': 20}}, │ 'F1ScoringFn': { │ │ 'f1_average': 0.35579260478321006, │ │ 'precision_average': 0.32030238933180105, │ │ 'recall_average': 0.6091666666666666 │ } } num_tool_calls=[2, 1, 1, 5, 5, 5, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 3, 2] num_examples_with_tool_call=20 num_examples_with_pythontag=0 The answers have higher recall, and make fewer tool calls. Note that these were run with max_infer_iter=5, so the current prompt hits this limit more often, and without the limit, someitmes goes into infinite tool calling loop. The data here is with 3.3-70B. Results are equally poor with either prompt with 3.2-3B ~30 recall. --- .../models/llama/llama3/prompt_templates/system_prompts.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py index 27b1a3502..74a3ae4f0 100644 --- a/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py +++ b/llama_stack/models/llama/llama3/prompt_templates/system_prompts.py @@ -226,10 +226,9 @@ class FunctionTagCustomToolGenerator(PromptTemplateGeneratorBase): class PythonListCustomToolGenerator(PromptTemplateGeneratorBase): # noqa: N801 DEFAULT_PROMPT = textwrap.dedent( """ + You are a helpful assistant. You have access to functions, but you should only use them if they are required. You are an expert in composing functions. You are given a question and a set of possible functions. - Based on the question, you will need to make one or more function/tool calls to achieve the purpose. - If none of the function can be used, point it out. If the given question lacks the parameters required by the function, - also point it out. You should only return the function call in tools call sections. + Based on the question, you may or may not need to make one function/tool call to achieve the purpose. {{ function_description }} """.strip("\n") From 8efa53daf1fb69b34fb7a0ba25bee38d8ffed335 Mon Sep 17 00:00:00 2001 From: Hardik Shah Date: Thu, 27 Feb 2025 23:06:37 -0800 Subject: [PATCH 07/22] fix: Agent telemetry inputs/outputs should be structured (#1302) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Original telemetry outputs for agent turns look like this. Note: how output was a `str(message)` making it difficult to read them back for downstream tasks ( eg. building eval datasets ) ``` { │ │ 'input': [ │ │ │ '{"role":"system","content":"You are a helpful assistant. Use search tool to answer the questions. "}', │ │ │ '{"role":"user","content":"Which teams played in the NBA western conference finals of 2024","context":null}' │ │ ], │ │ 'output': "content: tool_calls: [ToolCall(call_id='8b7294ec-a83f-4798-ad8f-6bed662f08b6', tool_name=, arguments={'query': 'NBA Western Conference Finals 2024 teams'})]" │ }, ``` Updated the outputs to be structured . ## Test ```python import uuid from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger from llama_stack_client.types.agent_create_params import AgentConfig model_id = "meta-llama/Llama-3.1-8B-Instruct" agent_config = AgentConfig( model=model_id, instructions="You are a helpful assistant who will use the web search tools to help with answering questions.\nOnly provide final answer in short without writing full sentences. Use web search", toolgroups=["builtin::websearch"], enable_session_persistence=True, ) agent = Agent(client, agent_config) session_id = agent.create_session(uuid.uuid4().hex) response = agent.create_turn( messages=[ { "role": "user", "content": "latest news about llama stack", } ], session_id=session_id, stream=False, ) pprint(response) ``` Output: ``` Turn( │ input_messages=[UserMessage(content='latest news about llama stack', role='user', context=None)], │ output_message=CompletionMessage( │ │ content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ │ role='assistant', │ │ stop_reason='end_of_turn', │ │ tool_calls=[] │ ), │ session_id='77379546-4598-485a-b4f4-84e5da28c513', │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915243, tzinfo=TzInfo(-08:00)), │ steps=[ │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content='', │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[ │ │ │ │ │ ToolCall( │ │ │ │ │ │ arguments={'query': 'latest news llama stack'}, │ │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ │ tool_name='brave_search' │ │ │ │ │ ) │ │ │ │ ] │ │ │ ), │ │ │ step_id='81c16bd3-eb00-4721-8edc-f386e07391a3', │ │ │ step_type='inference', │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 637149, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 43, 915831, tzinfo=TzInfo(-08:00)) │ │ ), │ │ ToolExecutionStep( │ │ │ step_id='4782d609-a62e-45f5-8d2a-25a43db46288', │ │ │ step_type='tool_execution', │ │ │ tool_calls=[ │ │ │ │ ToolCall( │ │ │ │ │ arguments={'query': 'latest news llama stack'}, │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ tool_name='brave_search' │ │ │ │ ) │ │ │ ], │ │ │ tool_responses=[ │ │ │ │ ToolResponse( │ │ │ │ │ call_id='84c0fa10-e24a-4f91-a9ff-415a9ec0bb0b', │ │ │ │ │ content='{"query": "latest news llama stack", "top_k": [{"title": "Llama 3.2: Revol. ....... Hacker News.", "score": 0.6186197, "raw_content": null}]}', │ │ │ │ │ tool_name='brave_search', │ │ │ │ │ metadata=None │ │ │ │ ) │ │ │ ], │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 272176, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 44, 640743, tzinfo=TzInfo(-08:00)) │ │ ), │ │ InferenceStep( │ │ │ api_model_response=CompletionMessage( │ │ │ │ content="The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ │ │ │ role='assistant', │ │ │ │ stop_reason='end_of_turn', │ │ │ │ tool_calls=[] │ │ │ ), │ │ │ step_id='37994419-5da3-4e84-a010-8d9b85366262', │ │ │ step_type='inference', │ │ │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ │ │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 961275, tzinfo=TzInfo(-08:00)), │ │ │ started_at=datetime.datetime(2025, 2, 27, 11, 2, 46, 273168, tzinfo=TzInfo(-08:00)) │ │ ) │ ], │ turn_id='2c6b5273-4b16-404f-bed2-c0025fd63b45', │ completed_at=datetime.datetime(2025, 2, 27, 11, 2, 48, 962318, tzinfo=TzInfo(-08:00)), │ output_attachments=[] ) ``` ## Check for Telemetry ```python agent_logs = [] for span in client.telemetry.query_spans( attribute_filters=[ {"key": "session_id", "op": "eq", "value": session_id}, ], attributes_to_return=['input', 'output'], ): agent_logs.append(span.attributes) pprint(json.loads(agent_logs[-1]['output'])) ``` ``` { │ 'content': "The latest news about Llama Stack is that Meta has released Llama 3.2, which includes small and medium-sized vision LLMs (11B and 90B) and lightweight, text-only models (1B and 3B) that fit onto select edge and mobile devices. Additionally, Llama Stack distributions have been released to simplify the way developers work with Llama models in different environments. However, a critical vulnerability has been discovered in Meta's Llama-Stack, which puts AI applications at risk.", │ 'tool_calls': [] } ``` --- .../agents/meta_reference/agent_instance.py | 13 ++++++++++-- .../utils/telemetry/trace_protocol.py | 20 ++++++++++++++++--- 2 files changed, 28 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 5c492434f..2a93e7b3f 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -610,8 +610,17 @@ class ChatAgent(ShieldRunnerMixin): if event.stop_reason is not None: stop_reason = event.stop_reason span.set_attribute("stop_reason", stop_reason) - span.set_attribute("input", [m.model_dump_json() for m in input_messages]) - span.set_attribute("output", f"content: {content} tool_calls: {tool_calls}") + span.set_attribute( + "input", + json.dumps([json.loads(m.model_dump_json()) for m in input_messages]), + ) + output_attr = json.dumps( + { + "content": content, + "tool_calls": [json.loads(t.model_dump_json()) for t in tool_calls], + } + ) + span.set_attribute("output", output_attr) stop_reason = stop_reason or StopReason.out_of_tokens diff --git a/llama_stack/providers/utils/telemetry/trace_protocol.py b/llama_stack/providers/utils/telemetry/trace_protocol.py index 924274c42..525ade74d 100644 --- a/llama_stack/providers/utils/telemetry/trace_protocol.py +++ b/llama_stack/providers/utils/telemetry/trace_protocol.py @@ -6,6 +6,7 @@ import asyncio import inspect +import json from functools import wraps from typing import Any, AsyncGenerator, Callable, Type, TypeVar @@ -17,6 +18,10 @@ T = TypeVar("T") def serialize_value(value: Any) -> Primitive: + return str(_prepare_for_json(value)) + + +def _prepare_for_json(value: Any) -> str: """Serialize a single value into JSON-compatible format.""" if value is None: return "" @@ -25,9 +30,17 @@ def serialize_value(value: Any) -> Primitive: elif hasattr(value, "_name_"): return value._name_ elif isinstance(value, BaseModel): - return value.model_dump_json() + return json.loads(value.model_dump_json()) + elif isinstance(value, (list, tuple, set)): + return [_prepare_for_json(item) for item in value] + elif isinstance(value, dict): + return {str(k): _prepare_for_json(v) for k, v in value.items()} else: - return str(value) + try: + json.dumps(value) + return value + except Exception: + return str(value) def trace_protocol(cls: Type[T]) -> Type[T]: @@ -104,7 +117,8 @@ def trace_protocol(cls: Type[T]) -> Type[T]: result = method(self, *args, **kwargs) span.set_attribute("output", serialize_value(result)) return result - except Exception as _e: + except Exception as e: + span.set_attribute("error", str(e)) raise if is_async_gen: From 7f9b76727724c926b7efb50c523713cdb692d438 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Thu, 27 Feb 2025 23:07:23 -0800 Subject: [PATCH 08/22] fix: check conda env name using basepath in exec.py (#1301) # What does this PR do? check conda env name using basepath in exec.py The current logic for finding conda prefix does a `endswith` check with just the conda env name, but this will cause us to match incorrect if there is a different conda env which ends with same suffix. In my case, i had stack and llama-stack as the two conda envs. ## Test Plan llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml --- llama_stack/distribution/utils/exec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/utils/exec.py b/llama_stack/distribution/utils/exec.py index 82bf00e3c..aae6b35d8 100644 --- a/llama_stack/distribution/utils/exec.py +++ b/llama_stack/distribution/utils/exec.py @@ -46,7 +46,7 @@ def formulate_run_args(image_type, image_name, config, template_name) -> list: conda_env_info = json.loads(subprocess.check_output(["conda", "info", "--envs", "--json"]).decode()) envs = conda_env_info["envs"] for envpath in envs: - if envpath.endswith(env_name): + if os.path.basename(envpath) == env_name: return envpath return None From 234408f411609a9463a954e0f5cbdff58fdc5b18 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 28 Feb 2025 12:18:02 -0500 Subject: [PATCH 09/22] docs: Add link to distributions guide in quick start guide (#1326) # What does this PR do? A couple of users have asked this question so I thought it would be a good idea to add a link. --- docs/source/getting_started/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index ecef20d55..eb0dcf392 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -38,7 +38,7 @@ The API is **exactly identical** for both clients. :::{dropdown} Starting up the Llama Stack server The Llama Stack server can be configured flexibly so you can mix-and-match various providers for its individual API components -- beyond Inference, these include Vector IO, Agents, Telemetry, Evals, Post Training, etc. -To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image. +To get started quickly, we provide various container images for the server component that work with different inference providers out of the box. For this guide, we will use `llamastack/distribution-ollama` as the container image. If you'd like to build your own image or customize the configurations, please check out [this guide](../references/index.md). Lets setup some environment variables that we will use in the rest of the guide. ```bash From 3b57d8ee882af8fdc5866acc0980ec46a4da9206 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 01:27:22 +0800 Subject: [PATCH 10/22] feat: add prompt-format list (#1222) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] https://github.com/meta-llama/llama-stack/blob/19ae4b35d9d22841ca14f30166d4b317554bd28d/llama_stack/cli/model/prompt_format.py#L47 Based on the comment: `Only Llama 3.1 and 3.2 are supported`, even 3.1, 3.2 are not all models can show it with `prompt-format`, so cannot refer to `llama model list`, only refer to list when enter a invalid model, so it would be nice to help to check the valid models: ``` llama model prompt-format -m Llama3.1-405B-Instruct:bf16-mp8 usage: llama model prompt-format [-h] [-m MODEL_NAME] [-l] llama model prompt-format: error: Llama3.1-405B-Instruct:bf16-mp8 is not a valid Model <<<<---. Choose one from -- Llama3.1-8B Llama3.1-70B Llama3.1-405B Llama3.1-8B-Instruct Llama3.1-70B-Instruct Llama3.1-405B-Instruct Llama3.2-1B Llama3.2-3B Llama3.2-1B-Instruct Llama3.2-3B-Instruct Llama3.2-11B-Vision Llama3.2-90B-Vision Llama3.2-11B-Vision-Instruct Llama3.2-90B-Vision-Instruct before: $ llama model prompt-format --help usage: llama model prompt-format [-h] [-m MODEL_NAME] Show llama model message formats options: -h, --help show this help message and exit -m MODEL_NAME, --model-name MODEL_NAME Model Family (llama3_1, llama3_X, etc.) Example: llama model prompt-format after: $ llama model prompt-format --help usage: llama model prompt-format [-h] [-m MODEL_NAME] [-l] Show llama model message formats options: -h, --help show this help message and exit -m MODEL_NAME, --model-name MODEL_NAME Model Family (llama3_1, llama3_X, etc.) -l, --list List the valid supported models Example: llama model prompt-format $ llama model prompt-format -l ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ Model ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩ │ Llama3.1-8B │ ├──────────────────────────────┤ │ Llama3.1-70B │ ├──────────────────────────────┤ │ Llama3.1-405B │ ├──────────────────────────────┤ │ Llama3.1-8B-Instruct │ ├──────────────────────────────┤ │ Llama3.1-70B-Instruct │ ├──────────────────────────────┤ │ Llama3.1-405B-Instruct │ ├──────────────────────────────┤ │ Llama3.2-1B │ ├──────────────────────────────┤ │ Llama3.2-3B │ ├──────────────────────────────┤ │ Llama3.2-1B-Instruct │ ├──────────────────────────────┤ │ Llama3.2-3B-Instruct │ ├──────────────────────────────┤ │ Llama3.2-11B-Vision │ ├──────────────────────────────┤ │ Llama3.2-90B-Vision │ ├──────────────────────────────┤ │ Llama3.2-11B-Vision-Instruct │ ├──────────────────────────────┤ │ Llama3.2-90B-Vision-Instruct │ └──────────────────────────────┘ ``` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) --------- Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/cli/model/prompt_format.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/llama_stack/cli/model/prompt_format.py b/llama_stack/cli/model/prompt_format.py index ea9596ba5..516c67634 100644 --- a/llama_stack/cli/model/prompt_format.py +++ b/llama_stack/cli/model/prompt_format.py @@ -9,6 +9,7 @@ import textwrap from io import StringIO from llama_stack.cli.subcommand import Subcommand +from llama_stack.cli.table import print_table from llama_stack.models.llama.datatypes import CoreModelId, ModelFamily, is_multimodal, model_family @@ -48,7 +49,26 @@ class ModelPromptFormat(Subcommand): supported_model_ids = [ m for m in CoreModelId if model_family(m) in {ModelFamily.llama3_1, ModelFamily.llama3_2} ] - model_str = "\n".join([m.value for m in supported_model_ids]) + + model_list = [m.value for m in supported_model_ids] + model_str = "\n".join(model_list) + + if args.list: + headers = ["Model(s)"] + rows = [] + for m in model_list: + rows.append( + [ + m, + ] + ) + print_table( + rows, + headers, + separate_rows=True, + ) + return + try: model_id = CoreModelId(args.model_name) except ValueError: From 6fa257b475d4580c7337130beb5423974282aa2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 28 Feb 2025 18:36:49 +0100 Subject: [PATCH 11/22] chore(lint): update Ruff ignores for project conventions and maintainability (#1184) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Added new ignores from flake8-bugbear (`B007`, `B008`) - Ignored `C901` (high function complexity) for now, pending review - Maintained PyTorch conventions (`N812`, `N817`) - Allowed `E731` (lambda assignments) for flexibility - Consolidated existing ignores (`E402`, `E501`, `F405`, `C408`, `N812`) - Documented rationale for each ignored rule This keeps our linting aligned with project needs while tracking potential fixes. Signed-off-by: Sébastien Han Signed-off-by: Sébastien Han --- llama_stack/cli/stack/_build.py | 2 +- llama_stack/cli/tests/test_stack_config.py | 2 +- llama_stack/distribution/distribution.py | 4 +- llama_stack/distribution/resolver.py | 4 +- .../distribution/ui/page/playground/rag.py | 2 +- .../agents/meta_reference/agent_instance.py | 6 +- .../inline/eval/meta_reference/eval.py | 1 - .../inference/meta_reference/inference.py | 1 - .../meta_reference/parallel_utils.py | 4 +- .../quantization/fp8_txest_disabled.py | 3 + .../recipes/lora_finetuning_single_device.py | 2 +- .../inline/scoring/braintrust/braintrust.py | 2 +- .../remote/inference/nvidia/nvidia.py | 2 +- .../remote/inference/nvidia/openai_utils.py | 4 +- llama_stack/providers/tests/eval/test_eval.py | 9 +- llama_stack/providers/tests/report.py | 90 +++++++++---------- .../providers/tests/scoring/test_scoring.py | 6 -- .../utils/inference/openai_compat.py | 6 +- .../providers/utils/kvstore/redis/redis.py | 2 +- .../utils/scoring/aggregation_utils.py | 2 +- .../utils/scoring/base_scoring_fn.py | 2 +- llama_stack/scripts/distro_codegen.py | 2 +- pyproject.toml | 39 ++------ tests/client-sdk/__init__.py | 1 + tests/client-sdk/agents/__init__.py | 1 + tests/client-sdk/conftest.py | 2 +- tests/client-sdk/inference/__init__.py | 1 + tests/client-sdk/inference/test_embedding.py | 4 +- .../inference/test_text_inference.py | 4 +- tests/client-sdk/report.py | 44 +++++---- tests/client-sdk/safety/__init__.py | 1 + tests/client-sdk/safety/test_safety.py | 2 +- tests/client-sdk/vector_io/__init__.py | 1 + 33 files changed, 113 insertions(+), 145 deletions(-) diff --git a/llama_stack/cli/stack/_build.py b/llama_stack/cli/stack/_build.py index 89db368db..baa7d2e32 100644 --- a/llama_stack/cli/stack/_build.py +++ b/llama_stack/cli/stack/_build.py @@ -141,7 +141,7 @@ def run_stack_build_command(args: argparse.Namespace) -> None: completer=WordCompleter(available_providers), complete_while_typing=True, validator=Validator.from_callable( - lambda x: x in available_providers, + lambda x: x in available_providers, # noqa: B023 - see https://github.com/astral-sh/ruff/issues/7847 error_message="Invalid provider, use to see options", ), ) diff --git a/llama_stack/cli/tests/test_stack_config.py b/llama_stack/cli/tests/test_stack_config.py index 2b7b2b210..333f86e38 100644 --- a/llama_stack/cli/tests/test_stack_config.py +++ b/llama_stack/cli/tests/test_stack_config.py @@ -112,7 +112,7 @@ def test_parse_and_maybe_upgrade_config_old_format(old_config): inference_providers = result.providers["inference"] assert len(inference_providers) == 2 - assert set(x.provider_id for x in inference_providers) == { + assert {x.provider_id for x in inference_providers} == { "remote::ollama-00", "meta-reference-01", } diff --git a/llama_stack/distribution/distribution.py b/llama_stack/distribution/distribution.py index 384e2c3c8..308081415 100644 --- a/llama_stack/distribution/distribution.py +++ b/llama_stack/distribution/distribution.py @@ -13,7 +13,7 @@ from llama_stack.providers.datatypes import Api, ProviderSpec def stack_apis() -> List[Api]: - return [v for v in Api] + return list(Api) class AutoRoutedApiInfo(BaseModel): @@ -55,7 +55,7 @@ def builtin_automatically_routed_apis() -> List[AutoRoutedApiInfo]: def providable_apis() -> List[Api]: - routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis()) + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} return [api for api in Api if api not in routing_table_apis and api != Api.inspect] diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 0bc2e774c..69a096e97 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -115,8 +115,8 @@ async def resolve_impls( - flatmaps, sorts and resolves the providers in dependency order - for each API, produces either a (local, passthrough or router) implementation """ - routing_table_apis = set(x.routing_table_api for x in builtin_automatically_routed_apis()) - router_apis = set(x.router_api for x in builtin_automatically_routed_apis()) + routing_table_apis = {x.routing_table_api for x in builtin_automatically_routed_apis()} + router_apis = {x.router_api for x in builtin_automatically_routed_apis()} providers_with_specs = {} diff --git a/llama_stack/distribution/ui/page/playground/rag.py b/llama_stack/distribution/ui/page/playground/rag.py index 202c9322f..4a916321d 100644 --- a/llama_stack/distribution/ui/page/playground/rag.py +++ b/llama_stack/distribution/ui/page/playground/rag.py @@ -134,7 +134,7 @@ def rag_chat_page(): dict( name="builtin::rag/knowledge_search", args={ - "vector_db_ids": [vector_db_id for vector_db_id in selected_vector_dbs], + "vector_db_ids": list(selected_vector_dbs), }, ) ], diff --git a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py index 2a93e7b3f..4d0d8ed45 100644 --- a/llama_stack/providers/inline/agents/meta_reference/agent_instance.py +++ b/llama_stack/providers/inline/agents/meta_reference/agent_instance.py @@ -797,10 +797,10 @@ class ChatAgent(ShieldRunnerMixin): self, toolgroups_for_turn: Optional[List[AgentToolGroup]] = None ) -> Tuple[List[ToolDefinition], Dict[str, str]]: # Determine which tools to include - agent_config_toolgroups = set( - (toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup) + agent_config_toolgroups = { + toolgroup.name if isinstance(toolgroup, AgentToolGroupWithArgs) else toolgroup for toolgroup in self.agent_config.toolgroups - ) + } toolgroups_for_turn_set = ( agent_config_toolgroups if toolgroups_for_turn is None diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 18d408a31..48157b018 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -86,7 +86,6 @@ class MetaReferenceEvalImpl( ) -> Job: task_def = self.benchmarks[benchmark_id] dataset_id = task_def.dataset_id - candidate = task_config.eval_candidate scoring_functions = task_def.scoring_functions dataset_def = await self.datasets_api.get_dataset(dataset_id=dataset_id) validate_dataset_schema(dataset_def.dataset_schema, get_valid_schemas(Api.eval.value)) diff --git a/llama_stack/providers/inline/inference/meta_reference/inference.py b/llama_stack/providers/inline/inference/meta_reference/inference.py index 763d9664d..516ac1ad8 100644 --- a/llama_stack/providers/inline/inference/meta_reference/inference.py +++ b/llama_stack/providers/inline/inference/meta_reference/inference.py @@ -208,7 +208,6 @@ class MetaReferenceInferenceImpl( logprobs = [] stop_reason = None - tokenizer = self.generator.formatter.tokenizer for token_result in self.generator.completion(request): tokens.append(token_result.token) if token_result.text == "<|eot_id|>": diff --git a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py index 658267f7f..91d0445ab 100644 --- a/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py +++ b/llama_stack/providers/inline/inference/meta_reference/parallel_utils.py @@ -207,7 +207,7 @@ def maybe_parse_message(maybe_json: Optional[str]) -> Optional[ProcessingMessage return parse_message(maybe_json) except json.JSONDecodeError: return None - except ValueError as e: + except ValueError: return None @@ -352,7 +352,7 @@ class ModelParallelProcessGroup: if isinstance(obj, TaskResponse): yield obj.result - except GeneratorExit as e: + except GeneratorExit: self.request_socket.send(encode_msg(CancelSentinel())) while True: obj_json = self.request_socket.send() diff --git a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py index 014a26f09..cecb66dd3 100644 --- a/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py +++ b/llama_stack/providers/inline/inference/meta_reference/quantization/fp8_txest_disabled.py @@ -7,6 +7,9 @@ # Copyright (c) Meta Platforms, Inc. and affiliates. # This software may be used and distributed in accordance with the terms of the Llama 3 Community License Agreement. +# The file gets a special treatment for now? +# ruff: noqa: N803 + import unittest import torch diff --git a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py index 41387474f..c88787f18 100644 --- a/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py +++ b/llama_stack/providers/inline/post_training/torchtune/recipes/lora_finetuning_single_device.py @@ -264,7 +264,7 @@ class LoraFinetuningSingleDevice: ) self.adapter_params = get_adapter_params(model) - self._is_dora = any(["magnitude" in k for k in self.adapter_params.keys()]) + self._is_dora = any("magnitude" in k for k in self.adapter_params.keys()) set_trainable_params(model, self.adapter_params) diff --git a/llama_stack/providers/inline/scoring/braintrust/braintrust.py b/llama_stack/providers/inline/scoring/braintrust/braintrust.py index be0f023f3..a48b6b58b 100644 --- a/llama_stack/providers/inline/scoring/braintrust/braintrust.py +++ b/llama_stack/providers/inline/scoring/braintrust/braintrust.py @@ -133,7 +133,7 @@ class BraintrustScoringImpl( async def shutdown(self) -> None: ... async def list_scoring_functions(self) -> List[ScoringFn]: - scoring_fn_defs_list = [x for x in self.supported_fn_defs_registry.values()] + scoring_fn_defs_list = list(self.supported_fn_defs_registry.values()) for f in scoring_fn_defs_list: assert f.identifier.startswith("braintrust"), ( "All braintrust scoring fn must have identifier prefixed with 'braintrust'! " diff --git a/llama_stack/providers/remote/inference/nvidia/nvidia.py b/llama_stack/providers/remote/inference/nvidia/nvidia.py index 2ca7dd578..db9e176ee 100644 --- a/llama_stack/providers/remote/inference/nvidia/nvidia.py +++ b/llama_stack/providers/remote/inference/nvidia/nvidia.py @@ -198,7 +198,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper): tool_config: Optional[ToolConfig] = None, ) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]: if tool_prompt_format: - warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring") + warnings.warn("tool_prompt_format is not supported by NVIDIA NIM, ignoring", stacklevel=2) await check_health(self._config) # this raises errors diff --git a/llama_stack/providers/remote/inference/nvidia/openai_utils.py b/llama_stack/providers/remote/inference/nvidia/openai_utils.py index 1849fda6d..0582cb816 100644 --- a/llama_stack/providers/remote/inference/nvidia/openai_utils.py +++ b/llama_stack/providers/remote/inference/nvidia/openai_utils.py @@ -106,7 +106,7 @@ async def convert_chat_completion_request( payload.update(temperature=strategy.temperature) elif isinstance(strategy, TopKSamplingStrategy): if strategy.top_k != -1 and strategy.top_k < 1: - warnings.warn("top_k must be -1 or >= 1") + warnings.warn("top_k must be -1 or >= 1", stacklevel=2) nvext.update(top_k=strategy.top_k) elif isinstance(strategy, GreedySamplingStrategy): nvext.update(top_k=-1) @@ -168,7 +168,7 @@ def convert_completion_request( payload.update(top_p=request.sampling_params.top_p) elif request.sampling_params.strategy == "top_k": if request.sampling_params.top_k != -1 and request.sampling_params.top_k < 1: - warnings.warn("top_k must be -1 or >= 1") + warnings.warn("top_k must be -1 or >= 1", stacklevel=2) nvext.update(top_k=request.sampling_params.top_k) elif request.sampling_params.strategy == "greedy": nvext.update(top_k=-1) diff --git a/llama_stack/providers/tests/eval/test_eval.py b/llama_stack/providers/tests/eval/test_eval.py index ad80b8601..9ce3a972b 100644 --- a/llama_stack/providers/tests/eval/test_eval.py +++ b/llama_stack/providers/tests/eval/test_eval.py @@ -39,12 +39,11 @@ class Testeval: @pytest.mark.asyncio async def test_eval_evaluate_rows(self, eval_stack, inference_model, judge_model): - eval_impl, benchmarks_impl, datasetio_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasetio_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasetio], eval_stack[Api.datasets], - eval_stack[Api.models], ) await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval") @@ -92,11 +91,10 @@ class Testeval: @pytest.mark.asyncio async def test_eval_run_eval(self, eval_stack, inference_model, judge_model): - eval_impl, benchmarks_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasets], - eval_stack[Api.models], ) await register_dataset(datasets_impl, for_generation=True, dataset_id="test_dataset_for_eval") @@ -131,11 +129,10 @@ class Testeval: @pytest.mark.asyncio async def test_eval_run_benchmark_eval(self, eval_stack, inference_model): - eval_impl, benchmarks_impl, datasets_impl, models_impl = ( + eval_impl, benchmarks_impl, datasets_impl = ( eval_stack[Api.eval], eval_stack[Api.benchmarks], eval_stack[Api.datasets], - eval_stack[Api.models], ) response = await datasets_impl.list_datasets() diff --git a/llama_stack/providers/tests/report.py b/llama_stack/providers/tests/report.py index febd13045..c9a7f69a8 100644 --- a/llama_stack/providers/tests/report.py +++ b/llama_stack/providers/tests/report.py @@ -18,54 +18,48 @@ from llama_stack.models.llama.sku_list import all_registered_models INFERENCE_APIS = ["chat_completion"] FUNCTIONALITIES = ["streaming", "structured_output", "tool_calling"] SUPPORTED_MODELS = { - "ollama": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_1b.value, - ] - ), - "fireworks": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ] - ), - "together": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_11b_vision.value, - ] - ), + "ollama": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_1b.value, + }, + "fireworks": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + }, + "together": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_11b_vision.value, + }, } diff --git a/llama_stack/providers/tests/scoring/test_scoring.py b/llama_stack/providers/tests/scoring/test_scoring.py index e98fd8627..d80b105f4 100644 --- a/llama_stack/providers/tests/scoring/test_scoring.py +++ b/llama_stack/providers/tests/scoring/test_scoring.py @@ -45,13 +45,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) scoring_fns_list = await scoring_functions_impl.list_scoring_functions() provider_id = scoring_fns_list[0].provider_id @@ -102,13 +100,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) await register_dataset(datasets_impl, for_rag=True) response = await datasets_impl.list_datasets() @@ -163,13 +159,11 @@ class TestScoring: scoring_functions_impl, datasetio_impl, datasets_impl, - models_impl, ) = ( scoring_stack[Api.scoring], scoring_stack[Api.scoring_functions], scoring_stack[Api.datasetio], scoring_stack[Api.datasets], - scoring_stack[Api.models], ) await register_dataset(datasets_impl, for_rag=True) rows = await datasetio_impl.get_rows_paginated( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index d0fdf6385..98c2bfd2e 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -605,7 +605,7 @@ def convert_tool_call( tool_name=tool_call.function.name, arguments=json.loads(tool_call.function.arguments), ) - except Exception as e: + except Exception: return UnparseableToolCall( call_id=tool_call.id or "", tool_name=tool_call.function.name or "", @@ -876,7 +876,9 @@ async def convert_openai_chat_completion_stream( # it is possible to have parallel tool calls in stream, but # ChatCompletionResponseEvent only supports one per stream if len(choice.delta.tool_calls) > 1: - warnings.warn("multiple tool calls found in a single delta, using the first, ignoring the rest") + warnings.warn( + "multiple tool calls found in a single delta, using the first, ignoring the rest", stacklevel=2 + ) if not enable_incremental_tool_calls: yield ChatCompletionResponseStreamChunk( diff --git a/llama_stack/providers/utils/kvstore/redis/redis.py b/llama_stack/providers/utils/kvstore/redis/redis.py index f5254198b..a390ea866 100644 --- a/llama_stack/providers/utils/kvstore/redis/redis.py +++ b/llama_stack/providers/utils/kvstore/redis/redis.py @@ -36,7 +36,7 @@ class RedisKVStoreImpl(KVStore): value = await self.redis.get(key) if value is None: return None - ttl = await self.redis.ttl(key) + await self.redis.ttl(key) return value async def delete(self, key: str) -> None: diff --git a/llama_stack/providers/utils/scoring/aggregation_utils.py b/llama_stack/providers/utils/scoring/aggregation_utils.py index 35c4ee180..6686e4ade 100644 --- a/llama_stack/providers/utils/scoring/aggregation_utils.py +++ b/llama_stack/providers/utils/scoring/aggregation_utils.py @@ -32,7 +32,7 @@ def aggregate_categorical_count( scoring_results: List[ScoringResultRow], ) -> Dict[str, Any]: scores = [str(r["score"]) for r in scoring_results] - unique_scores = sorted(list(set(scores))) + unique_scores = sorted(set(scores)) return {"categorical_count": {s: scores.count(s) for s in unique_scores}} diff --git a/llama_stack/providers/utils/scoring/base_scoring_fn.py b/llama_stack/providers/utils/scoring/base_scoring_fn.py index a741e5baa..d28c57cc1 100644 --- a/llama_stack/providers/utils/scoring/base_scoring_fn.py +++ b/llama_stack/providers/utils/scoring/base_scoring_fn.py @@ -66,7 +66,7 @@ class RegisteredBaseScoringFn(BaseScoringFn): return self.__class__.__name__ def get_supported_scoring_fn_defs(self) -> List[ScoringFn]: - return [x for x in self.supported_fn_defs_registry.values()] + return list(self.supported_fn_defs_registry.values()) def register_scoring_fn_def(self, scoring_fn: ScoringFn) -> None: if scoring_fn.identifier in self.supported_fn_defs_registry: diff --git a/llama_stack/scripts/distro_codegen.py b/llama_stack/scripts/distro_codegen.py index 76c7283eb..92c82983e 100644 --- a/llama_stack/scripts/distro_codegen.py +++ b/llama_stack/scripts/distro_codegen.py @@ -99,7 +99,7 @@ def collect_template_dependencies(template_dir: Path) -> tuple[str | None, list[ template = template_func() normal_deps, special_deps = get_provider_dependencies(template.providers) # Combine all dependencies in order: normal deps, special deps, server deps - all_deps = sorted(list(set(normal_deps + SERVER_DEPENDENCIES))) + sorted(list(set(special_deps))) + all_deps = sorted(set(normal_deps + SERVER_DEPENDENCIES)) + sorted(set(special_deps)) return template.name, all_deps except Exception: diff --git a/pyproject.toml b/pyproject.toml index dc5659f06..893aa3330 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -123,39 +123,16 @@ select = [ "I", # isort ] ignore = [ - "E203", - "E305", - "E402", - "E501", # line too long - "E721", - "E741", - "F405", - "F841", - "C408", # ignored because we like the dict keyword argument syntax - "E302", - "W291", - "E303", - "N812", # ignored because import torch.nn.functional as F is PyTorch convention - "N817", # ignored because importing using acronyms is convention (DistributedDataParallel as DDP) - "E731", # allow usage of assigning lambda expressions + # The following ignores are desired by the project maintainers. + "E402", # Module level import not at top of file + "E501", # Line too long + "F405", # Maybe undefined or defined from star import + "C408", # Ignored because we like the dict keyword argument syntax + "N812", # Ignored because import torch.nn.functional as F is PyTorch convention + # These are the additional ones we started ignoring after moving to ruff. We should look into each one of them later. - "C901", - "C405", - "C414", - "N803", - "N999", - "C403", - "C416", - "B028", - "C419", - "C401", - "B023", - # shebang has extra meaning in fbcode lints, so I think it's not worth trying - # to line this up with executable bit - "EXE001", - "N802", # random naming hints don't need + "C901", # Complexity of the function is too high # these ignores are from flake8-bugbear; please fix! - "B007", "B008", ] diff --git a/tests/client-sdk/__init__.py b/tests/client-sdk/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/__init__.py +++ b/tests/client-sdk/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/agents/__init__.py b/tests/client-sdk/agents/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/agents/__init__.py +++ b/tests/client-sdk/agents/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/conftest.py b/tests/client-sdk/conftest.py index c0f4dca53..3ecf45086 100644 --- a/tests/client-sdk/conftest.py +++ b/tests/client-sdk/conftest.py @@ -117,7 +117,7 @@ def client_with_models(llama_stack_client, text_model_id, vision_model_id, embed assert len(providers) > 0, "No inference providers found" inference_providers = [p.provider_id for p in providers if p.provider_type != "inline::sentence-transformers"] - model_ids = set(m.identifier for m in client.models.list()) + model_ids = {m.identifier for m in client.models.list()} model_ids.update(m.provider_resource_id for m in client.models.list()) if text_model_id and text_model_id not in model_ids: diff --git a/tests/client-sdk/inference/__init__.py b/tests/client-sdk/inference/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/inference/__init__.py +++ b/tests/client-sdk/inference/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index c46a6517f..69d35d05d 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -176,7 +176,7 @@ def test_embedding_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError) as excinfo: + with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation ) @@ -243,7 +243,7 @@ def test_embedding_text_truncation_error( ): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - with pytest.raises(BadRequestError) as excinfo: + with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation ) diff --git a/tests/client-sdk/inference/test_text_inference.py b/tests/client-sdk/inference/test_text_inference.py index 7850d2d57..63813a1cc 100644 --- a/tests/client-sdk/inference/test_text_inference.py +++ b/tests/client-sdk/inference/test_text_inference.py @@ -139,7 +139,7 @@ def test_text_completion_log_probs_streaming(client_with_models, text_model_id, "top_k": 1, }, ) - streamed_content = [chunk for chunk in response] + streamed_content = list(response) for chunk in streamed_content: if chunk.delta: # if there's a token, we expect logprobs assert chunk.logprobs, "Logprobs should not be empty" @@ -405,7 +405,7 @@ def test_text_chat_completion_tool_calling_tools_not_in_request( assert delta.tool_call.tool_name == "get_object_namespace_list" if delta.type == "tool_call" and delta.parse_status == "failed": # expect raw message that failed to parse in tool_call - assert type(delta.tool_call) == str + assert isinstance(delta.tool_call, str) assert len(delta.tool_call) > 0 else: for tc in response.completion_message.tool_calls: diff --git a/tests/client-sdk/report.py b/tests/client-sdk/report.py index b946b85ba..0151b3d20 100644 --- a/tests/client-sdk/report.py +++ b/tests/client-sdk/report.py @@ -42,29 +42,27 @@ def featured_models(): SUPPORTED_MODELS = { - "ollama": set( - [ - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_8b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_70b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_1_405b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_1b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_3b_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_11b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_2_90b_vision_instruct.value, - CoreModelId.llama3_3_70b_instruct.value, - CoreModelId.llama_guard_3_8b.value, - CoreModelId.llama_guard_3_1b.value, - ] - ), - "tgi": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]), - "vllm": set([model.core_model_id.value for model in all_registered_models() if model.huggingface_repo]), + "ollama": { + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_8b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_70b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_1_405b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_1b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_3b_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_11b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_2_90b_vision_instruct.value, + CoreModelId.llama3_3_70b_instruct.value, + CoreModelId.llama_guard_3_8b.value, + CoreModelId.llama_guard_3_1b.value, + }, + "tgi": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo}, + "vllm": {model.core_model_id.value for model in all_registered_models() if model.huggingface_repo}, } diff --git a/tests/client-sdk/safety/__init__.py b/tests/client-sdk/safety/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/safety/__init__.py +++ b/tests/client-sdk/safety/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 diff --git a/tests/client-sdk/safety/test_safety.py b/tests/client-sdk/safety/test_safety.py index 1417a9c06..79963e4d4 100644 --- a/tests/client-sdk/safety/test_safety.py +++ b/tests/client-sdk/safety/test_safety.py @@ -42,7 +42,7 @@ def code_scanner_shield_id(available_shields): @pytest.fixture(scope="session") def model_providers(llama_stack_client): - return set([x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"]) + return {x.provider_id for x in llama_stack_client.providers.list() if x.api == "inference"} def test_unsafe_examples(llama_stack_client, llama_guard_text_shield_id): diff --git a/tests/client-sdk/vector_io/__init__.py b/tests/client-sdk/vector_io/__init__.py index 756f351d8..ce038c94b 100644 --- a/tests/client-sdk/vector_io/__init__.py +++ b/tests/client-sdk/vector_io/__init__.py @@ -3,3 +3,4 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# ruff: noqa: N999 From 18ab1985da2cb461772bd9a4501a7803555eaa1f Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 28 Feb 2025 12:48:49 -0500 Subject: [PATCH 12/22] fix: Make remote::vllm compatible with vLLM <= v0.6.3 (#1325) # What does this PR do? This is to be consistent with OpenAI API and support vLLM <= v0.6.3 References: * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice * https://github.com/vllm-project/vllm/pull/10000 This fixes the error when running older versions of vLLM: ``` 00:50:19.834 [START] /v1/inference/chat-completion INFO 2025-02-28 00:50:20,203 httpx:1025: HTTP Request: POST https://api-xeai-granite-3-1-8b-instruct.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/chat/completions "HTTP/1.1 400 Bad Request" Traceback (most recent call last): File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 235, in endpoint return await maybe_await(value) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 201, in maybe_await return await value File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/routers/routers.py", line 193, in chat_completion return await provider.chat_completion(**params) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 286, in chat_completion return await self._nonstream_chat_completion(request, self.client) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 292, in _nonstream_chat_completion r = client.chat.completions.create(**params) File "/usr/local/lib/python3.10/site-packages/openai/_utils/_utils.py", line 279, in wrapper return func(*args, **kwargs) File "/usr/local/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py", line 879, in create return self._post( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1290, in post return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)) File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 967, in request return self._request( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1071, in _request raise self._make_status_error_from_response(err.response) from None openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'value_error', 'loc': ('body',), 'msg': 'Value error, When using `tool_choice`, `tools` must be set.', 'input': {'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What model are you?'}]}], 'model': 'granite-3-1-8b-instruct', 'max_tokens': 4096, 'stream': False, 'temperature': 0.0, 'tools': None, 'tool_choice': 'auto'}, 'ctx': {'error': ValueError('When using `tool_choice`, `tools` must be set.')}}]", 'type': 'BadRequestError', 'param': None, 'code': 400} INFO: 2600:1700:9d20:ac0::49:59736 - "POST /v1/inference/chat-completion HTTP/1.1" 500 Internal Server Error 00:50:20.266 [END] /v1/inference/chat-completion [StatusCode.OK] (431.99ms) ``` ## Test Plan All existing tests pass. --------- Signed-off-by: Yuan Tang --- llama_stack/providers/remote/inference/vllm/vllm.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/llama_stack/providers/remote/inference/vllm/vllm.py b/llama_stack/providers/remote/inference/vllm/vllm.py index 967a3e44d..8ec23cd90 100644 --- a/llama_stack/providers/remote/inference/vllm/vllm.py +++ b/llama_stack/providers/remote/inference/vllm/vllm.py @@ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): tool_config: Optional[ToolConfig] = None, ) -> AsyncGenerator: model = await self.model_store.get_model(model_id) + # This is to be consistent with OpenAI API and support vLLM <= v0.6.3 + # References: + # * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice + # * https://github.com/vllm-project/vllm/pull/10000 + if not tools and tool_config is not None: + tool_config.tool_choice = ToolChoice.none request = ChatCompletionRequest( model=model.provider_resource_id, messages=messages, From c91548fe07ca7ec0fa33cf82443e165594abda9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Han?= Date: Fri, 28 Feb 2025 19:01:52 +0100 Subject: [PATCH 13/22] build(container): misc improvements (#1291) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # What does this PR do? See individual commit messages. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan Apply this diff: ``` diff --git a/llama_stack/templates/ollama/build.yaml b/llama_stack/templates/ollama/build.yaml index da33b8d5..4a702f6f 100644 --- a/llama_stack/templates/ollama/build.yaml +++ b/llama_stack/templates/ollama/build.yaml @@ -28,5 +28,5 @@ distribution_spec: - remote::tavily-search - inline::code-interpreter - inline::rag-runtime - - remote::model-context-protocol + container_image: "registry.access.redhat.com/ubi9" image_type: conda ``` Then run: ``` CONTAINER_BINARY=podman llama stack build --template ollama --image-type container --image-name registry.access.redhat.com/ubi9 Containerfile created successfully in /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile FROM registry.access.redhat.com/ubi9 WORKDIR /app RUN dnf -y update && dnf install -y iputils net-tools wget vim-minimal python3.11 python3.11-pip python3.11-wheel python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 RUN pip install uv RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn RUN uv pip install --no-cache llama-stack RUN pip uninstall -y uv ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"] # Allows running as non-root user RUN mkdir -p /.llama /.cache RUN chmod -R g+rw /app /.llama /.cache PWD: /Users/leseb/Documents/AI/llama-stack Containerfile: /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile + podman build --platform linux/arm64 -t distribution-ollama:0.1.4 -f /var/folders/mq/rnm5w_7s2d3fxmtkx02knvhm0000gn/T/tmp.I7E5V6zbVI/Containerfile . --progress=plain STEP 1/11: FROM registry.access.redhat.com/ubi9 STEP 2/11: WORKDIR /app --> Using cache d73dafd4caddd75bc29242a9031258fea759dc571c5bb53a64b5e6d86b3b1335 --> d73dafd4cadd STEP 3/11: RUN dnf -y update && dnf install -y iputils net-tools wget vim-minimal python3.11 python3.11-pip python3.11-wheel python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all --> Using cache b74ad682db149771612a3ea1e4796e0760ab8a4e07c26ad672b46a86d38178c2 --> b74ad682db14 STEP 4/11: ENV UV_SYSTEM_PYTHON=1 --> Using cache 0812a05e6576506aa2fe646cbf239d0cb504cac30a50cb5cf4dc88e49039466d --> 0812a05e6576 STEP 5/11: RUN pip install uv --> Using cache a0ce1705f87e52f70f6eb34e66f67b68ebc7c1a073f4d2a664b189cfa89a4e88 --> a0ce1705f87e STEP 6/11: RUN uv pip install --no-cache ollama nltk opentelemetry-sdk aiosqlite matplotlib datasets sqlite-vec scipy chromadb-client psycopg2-binary numpy scikit-learn openai redis pandas tqdm blobfile sentencepiece aiohttp requests pillow pymongo transformers autoevals opentelemetry-exporter-otlp-proto-http pypdf chardet aiosqlite fastapi fire httpx uvicorn Using Python 3.11.9 environment at: /usr Resolved 107 packages in 1.78s Downloading kiwisolver (1.4MiB) Downloading aiohttp (1.6MiB) Downloading grpcio (5.4MiB) Downloading nltk (1.4MiB) Downloading transformers (9.5MiB) Downloading pydantic-core (1.7MiB) Downloading lxml (4.6MiB) Downloading psycopg2-binary (2.7MiB) Downloading scipy (33.8MiB) Downloading scikit-learn (12.0MiB) Downloading tokenizers (2.8MiB) Downloading fonttools (4.6MiB) Downloading pymongo (1.3MiB) Downloading rapidfuzz (1.4MiB) Downloading sentencepiece (1.2MiB) Downloading pyarrow (38.7MiB) Downloading matplotlib (8.1MiB) Downloading pycryptodomex (2.1MiB) Downloading pillow (4.2MiB) Downloading pandas (14.9MiB) Downloading numpy (13.6MiB) Building fire==0.7.0 Downloaded sentencepiece Downloaded kiwisolver Downloaded pymongo Downloaded rapidfuzz Downloaded nltk Downloaded aiohttp Built fire==0.7.0 Downloaded pydantic-core Downloaded pycryptodomex Downloaded psycopg2-binary Downloaded tokenizers Downloaded pillow Downloaded lxml Downloaded fonttools Downloaded grpcio Downloaded matplotlib Downloaded transformers Downloaded scikit-learn Downloaded numpy Downloaded pandas Downloaded scipy Downloaded pyarrow Prepared 107 packages in 3.03s Installed 107 packages in 62ms + aiohappyeyeballs==2.4.6 + aiohttp==3.11.13 + aiosignal==1.3.2 + aiosqlite==0.21.0 + annotated-types==0.7.0 + anyio==4.8.0 + attrs==25.1.0 + autoevals==0.0.120 + backoff==2.2.1 + blobfile==3.0.0 + braintrust-core==0.0.58 + certifi==2025.1.31 + chardet==5.2.0 + charset-normalizer==3.4.1 + chevron==0.14.0 + chromadb-client==0.6.3 + click==8.1.8 + contourpy==1.3.1 + cycler==0.12.1 + datasets==3.3.2 + deprecated==1.2.18 + dill==0.3.8 + distro==1.9.0 + dnspython==2.7.0 + fastapi==0.115.8 + filelock==3.17.0 + fire==0.7.0 + fonttools==4.56.0 + frozenlist==1.5.0 + fsspec==2024.12.0 + googleapis-common-protos==1.68.0 + grpcio==1.70.0 + h11==0.14.0 + httpcore==1.0.7 + httpx==0.28.1 + huggingface-hub==0.29.1 + idna==3.10 + importlib-metadata==8.5.0 + jiter==0.8.2 + joblib==1.4.2 + jsonschema==4.23.0 + jsonschema-specifications==2024.10.1 + kiwisolver==1.4.8 + levenshtein==0.26.1 + lxml==5.3.1 + matplotlib==3.10.0 + monotonic==1.6 + multidict==6.1.0 + multiprocess==0.70.16 + nltk==3.9.1 + numpy==1.26.4 + ollama==0.4.7 + openai==1.64.0 + opentelemetry-api==1.30.0 + opentelemetry-exporter-otlp-proto-common==1.30.0 + opentelemetry-exporter-otlp-proto-grpc==1.30.0 + opentelemetry-exporter-otlp-proto-http==1.30.0 + opentelemetry-proto==1.30.0 + opentelemetry-sdk==1.30.0 + opentelemetry-semantic-conventions==0.51b0 + orjson==3.10.15 + overrides==7.7.0 + packaging==24.2 + pandas==2.2.3 + pillow==11.1.0 + posthog==3.16.0 + propcache==0.3.0 + protobuf==5.29.3 + psycopg2-binary==2.9.10 + pyarrow==19.0.1 + pycryptodomex==3.21.0 + pydantic==2.10.6 + pydantic-core==2.27.2 + pymongo==4.11.1 + pyparsing==3.2.1 + pypdf==5.3.0 + python-dateutil==2.9.0.post0 + pytz==2025.1 + pyyaml==6.0.2 + rapidfuzz==3.12.1 + redis==5.2.1 + referencing==0.36.2 + regex==2024.11.6 + requests==2.32.3 + rpds-py==0.23.1 + safetensors==0.5.3 + scikit-learn==1.6.1 + scipy==1.15.2 + sentencepiece==0.2.0 + six==1.17.0 + sniffio==1.3.1 + sqlite-vec==0.1.6 + starlette==0.45.3 + tenacity==9.0.0 + termcolor==2.5.0 + threadpoolctl==3.5.0 + tokenizers==0.21.0 + tqdm==4.67.1 + transformers==4.49.0 + typing-extensions==4.12.2 + tzdata==2025.1 + urllib3==2.3.0 + uvicorn==0.34.0 + wrapt==1.17.2 + xxhash==3.5.0 + yarl==1.18.3 + zipp==3.21.0 --> 5b5b823605a1 STEP 7/11: RUN uv pip install --no-cache llama-stack Using Python 3.11.9 environment at: /usr Resolved 55 packages in 1.08s Downloading setuptools (1.2MiB) Downloading pygments (1.2MiB) Downloading llama-models (1.5MiB) Downloading tiktoken (1.1MiB) Downloaded tiktoken Downloaded llama-models Downloaded pygments Downloaded setuptools Prepared 15 packages in 402ms Installed 15 packages in 15ms + jinja2==3.1.5 + llama-models==0.1.4 + llama-stack==0.1.4 + llama-stack-client==0.1.4 + markdown-it-py==3.0.0 + markupsafe==3.0.2 + mdurl==0.1.2 + prompt-toolkit==3.0.50 + pyaml==25.1.0 + pygments==2.19.1 + python-dotenv==1.0.1 + rich==13.9.4 + setuptools==75.8.2 + tiktoken==0.9.0 + wcwidth==0.2.13 --> 38a037443807 STEP 8/11: RUN pip uninstall -y uv Found existing installation: uv 0.6.3 Uninstalling uv-0.6.3: Successfully uninstalled uv-0.6.3 WARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv --> 54f749dc5ece STEP 9/11: ENTRYPOINT ["python", "-m", "llama_stack.distribution.server.server", "--template", "ollama"] --> 481c138b1982 STEP 10/11: RUN mkdir -p /.llama /.cache --> 0fc174f014a8 STEP 11/11: RUN chmod -R g+rw /app /.llama /.cache COMMIT distribution-ollama:0.1.4 --> d41b4ab4b136 Successfully tagged localhost/distribution-ollama:0.1.4 d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3 + set +x Success! Build Successful! ``` UBI9 container successfully builds. Run the container: ``` podman run d41b4ab4b1363bfbaf6239e6f313bcb37873ef4b5f2fd816a4ee55acf2ac54d3 --env INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:213: Resolved 30 providers INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-inference => ollama INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: models => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inference => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-vector_io => sqlite-vec INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-safety => llama-guard INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: shields => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: safety => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: vector_dbs => __routing_table__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: vector_io => __autorouted__ INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-tool_runtime => brave-search INFO 2025-02-27 13:08:03,666 llama_stack.distribution.resolver:215: inner-tool_runtime => tavily-search ``` [//]: # (## Documentation) --------- Signed-off-by: Sébastien Han --- llama_stack/distribution/build.py | 3 - llama_stack/distribution/build_container.sh | 65 +++++++++++++-------- 2 files changed, 40 insertions(+), 28 deletions(-) diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 2b43b8128..3d808a4a4 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -15,7 +15,6 @@ from termcolor import cprint from llama_stack.distribution.datatypes import BuildConfig, Provider from llama_stack.distribution.distribution import get_provider_registry -from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.utils.exec import run_command, run_with_pty from llama_stack.distribution.utils.image_types import ImageType from llama_stack.providers.datatypes import Api @@ -103,8 +102,6 @@ def build_image( template_or_config, image_name, container_base, - str(build_file_path), - str(BUILDS_BASE_DIR / ImageType.container.value), " ".join(normal_deps), ] elif build_config.image_type == ImageType.conda.value: diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 08941a538..9b584a85c 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. @@ -20,26 +20,27 @@ UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT:-500} # mounting is not supported by docker buildx, so we use COPY instead USE_COPY_NOT_MOUNT=${USE_COPY_NOT_MOUNT:-} -if [ "$#" -lt 6 ]; then +if [ "$#" -lt 4 ]; then # This only works for templates - echo "Usage: $0 []" >&2 + echo "Usage: $0 []" >&2 exit 1 fi set -euo pipefail template_or_config="$1" -image_name="$2" -container_base="$3" -build_file_path="$4" -host_build_dir="$5" -pip_dependencies="$6" -special_pip_deps="${7:-}" +shift +image_name="$1" +shift +container_base="$1" +shift +pip_dependencies="$1" +shift +special_pip_deps="${1:-}" # Define color codes RED='\033[0;31m' -GREEN='\033[0;32m' NC='\033[0m' # No Color CONTAINER_BINARY=${CONTAINER_BINARY:-docker} @@ -48,7 +49,6 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) add_to_container() { - local input output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then printf '%s\n' "$1" >>"$output_file" @@ -64,9 +64,9 @@ if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then FROM $container_base WORKDIR /app -RUN microdnf -y update && microdnf install -y iputils net-tools wget \ +RUN dnf -y update && dnf install -y iputils net-tools wget \ vim-minimal python3.11 python3.11-pip python3.11-wheel \ - python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && microdnf clean all + python3.11-setuptools && ln -s /bin/pip3.11 /bin/pip && ln -s /bin/python3.11 /bin/python && dnf clean all ENV UV_SYSTEM_PYTHON=1 RUN pip install uv @@ -165,6 +165,11 @@ EOF fi fi +# remove uv after installation + add_to_container << EOF +RUN pip uninstall -y uv +EOF + # if template_or_config ends with .yaml, it is not a template and we should not use the --template flag if [[ "$template_or_config" != *.yaml ]]; then add_to_container << EOF @@ -185,26 +190,31 @@ RUN mkdir -p /.llama /.cache RUN chmod -R g+rw /app /.llama /.cache EOF -printf "Containerfile created successfully in $TEMP_DIR/Containerfile\n\n" -cat $TEMP_DIR/Containerfile +printf "Containerfile created successfully in %s/Containerfile\n\n" "$TEMP_DIR" +cat "$TEMP_DIR"/Containerfile printf "\n" -mounts="" +# Start building the CLI arguments +CLI_ARGS=() + +# Read CONTAINER_OPTS and put it in an array +read -ra CLI_ARGS <<< "$CONTAINER_OPTS" + if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then if [ -n "$LLAMA_STACK_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_DIR):$stack_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_DIR"):$stack_mount") fi if [ -n "$LLAMA_MODELS_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_MODELS_DIR):$models_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_MODELS_DIR"):$models_mount") fi if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then - mounts="$mounts -v $(readlink -f $LLAMA_STACK_CLIENT_DIR):$client_mount" + CLI_ARGS+=("-v" "$(readlink -f "$LLAMA_STACK_CLIENT_DIR"):$client_mount") fi fi if command -v selinuxenabled &>/dev/null && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir - CONTAINER_OPTS="$CONTAINER_OPTS --security-opt label=disable" + CLI_ARGS+=("--security-opt" "label=disable") fi # Set version tag based on PyPI version @@ -225,11 +235,11 @@ image_tag="$image_name:$version_tag" # Detect platform architecture ARCH=$(uname -m) if [ -n "$BUILD_PLATFORM" ]; then - PLATFORM="--platform $BUILD_PLATFORM" + CLI_ARGS+=("--platform $BUILD_PLATFORM") elif [ "$ARCH" = "arm64" ] || [ "$ARCH" = "aarch64" ]; then - PLATFORM="--platform linux/arm64" + CLI_ARGS+=("--platform" "linux/arm64") elif [ "$ARCH" = "x86_64" ]; then - PLATFORM="--platform linux/amd64" + CLI_ARGS+=("--platform" "linux/amd64") else echo "Unsupported architecture: $ARCH" exit 1 @@ -238,8 +248,13 @@ fi echo "PWD: $(pwd)" echo "Containerfile: $TEMP_DIR/Containerfile" set -x -$CONTAINER_BINARY build $CONTAINER_OPTS $PLATFORM -t $image_tag \ - -f "$TEMP_DIR/Containerfile" "." $mounts --progress=plain + +$CONTAINER_BINARY build \ + "${CLI_ARGS[@]}" \ + -t "$image_tag" \ + -f "$TEMP_DIR/Containerfile" \ + "." \ + --progress=plain # clean up tmp/configs set +x From 83dc8fbdffdc673dcdd6392ea9f1138fd0a9f412 Mon Sep 17 00:00:00 2001 From: Matthew Farrellee Date: Fri, 28 Feb 2025 12:02:36 -0600 Subject: [PATCH 14/22] test: cleanup embedding model test suite (#1322) # What does this PR do? - skip media tests for models that do not support media - skip output_dimension tests for models that do not support it - skip task_type tests for models that do not support it - provide task_type for models that require it ## Test Plan `LLAMA_STACK_BASE_URL=http://localhost:8321 pytest -v tests/client-sdk/inference/test_embedding.py --embedding-model ...` --- tests/client-sdk/inference/test_embedding.py | 65 ++++++++++++++++---- 1 file changed, 54 insertions(+), 11 deletions(-) diff --git a/tests/client-sdk/inference/test_embedding.py b/tests/client-sdk/inference/test_embedding.py index 69d35d05d..075f927f7 100644 --- a/tests/client-sdk/inference/test_embedding.py +++ b/tests/client-sdk/inference/test_embedding.py @@ -76,6 +76,25 @@ DUMMY_IMAGE_URL = ImageContentItem( ) DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image") SUPPORTED_PROVIDERS = {"remote::nvidia"} +MODELS_SUPPORTING_MEDIA = {} +MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"} +MODELS_REQUIRING_TASK_TYPE = { + "nvidia/llama-3.2-nv-embedqa-1b-v2", + "nvidia/nv-embedqa-e5-v5", + "nvidia/nv-embedqa-mistral-7b-v2", + "snowflake/arctic-embed-l", +} +MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE + + +def default_task_type(model_id): + """ + Some models require a task type parameter. This provides a default value for + testing those models. + """ + if model_id in MODELS_REQUIRING_TASK_TYPE: + return {"task_type": "query"} + return {} @pytest.mark.parametrize( @@ -92,7 +111,9 @@ SUPPORTED_PROVIDERS = {"remote::nvidia"} def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -110,11 +131,14 @@ def test_embedding_text(llama_stack_client, embedding_model_id, contents, infere "list[url,string,base64,text]", ], ) -@pytest.mark.xfail(reason="Media is not supported") def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=contents) + if embedding_model_id not in MODELS_SUPPORTING_MEDIA: + pytest.xfail(f"{embedding_model_id} doesn't support media") + response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id) + ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents) assert isinstance(response.embeddings[0], list) @@ -145,7 +169,10 @@ def test_embedding_truncation( if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=contents, text_truncation=text_truncation + model_id=embedding_model_id, + contents=contents, + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -178,26 +205,36 @@ def test_embedding_truncation_error( pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_LONG_TEXT], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_LONG_TEXT], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) -@pytest.mark.xfail(reason="Only valid for model supporting dimension reduction") def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") - base_response = llama_stack_client.inference.embeddings(model_id=embedding_model_id, contents=[DUMMY_STRING]) + if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION: + pytest.xfail(f"{embedding_model_id} doesn't support output_dimension") + base_response = llama_stack_client.inference.embeddings( + model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id) + ) test_response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], output_dimension=32 + model_id=embedding_model_id, + contents=[DUMMY_STRING], + **default_task_type(embedding_model_id), + output_dimension=32, ) assert len(base_response.embeddings[0]) != len(test_response.embeddings[0]) assert len(test_response.embeddings[0]) == 32 -@pytest.mark.xfail(reason="Only valid for model supporting task type") def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type): if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") + if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE: + pytest.xfail(f"{embedding_model_id} doesn't support task_type") query_embedding = llama_stack_client.inference.embeddings( model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query" ) @@ -220,7 +257,10 @@ def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_ if inference_provider_type not in SUPPORTED_PROVIDERS: pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") response = llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) assert isinstance(response, EmbeddingsResponse) assert len(response.embeddings) == 1 @@ -245,5 +285,8 @@ def test_embedding_text_truncation_error( pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet") with pytest.raises(BadRequestError): llama_stack_client.inference.embeddings( - model_id=embedding_model_id, contents=[DUMMY_STRING], text_truncation=text_truncation + model_id=embedding_model_id, + contents=[DUMMY_STRING], + text_truncation=text_truncation, + **default_task_type(embedding_model_id), ) From 5366dab31e3dfecab455a9a6c5f55cc18c7c7ae6 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:03:45 +0800 Subject: [PATCH 15/22] docs: update build doc (#1262) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/run.py#L22 https://github.com/meta-llama/llama-stack/blob/55eb257459f5f891d7e570740e816eed950131b3/llama_stack/cli/stack/_build.py#L103 [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- docs/source/distributions/building_distro.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/distributions/building_distro.md b/docs/source/distributions/building_distro.md index 9cb1a402f..20a835201 100644 --- a/docs/source/distributions/building_distro.md +++ b/docs/source/distributions/building_distro.md @@ -106,7 +106,7 @@ It would be best to start with a template and understand the structure of the co llama stack build > Enter a name for your Llama Stack (e.g. my-local-stack): my-stack -> Enter the image type you want your Llama Stack to be built as (container or conda): conda +> Enter the image type you want your Llama Stack to be built as (container or conda or venv): conda Llama Stack is composed of several APIs working together. Let's select the provider types (implementations) you want to use for these APIs. @@ -187,7 +187,7 @@ usage: llama stack run [-h] [--port PORT] [--image-name IMAGE_NAME] [--disable-i [--tls-certfile TLS_CERTFILE] [--image-type {conda,container,venv}] config -start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. +Start the server for a Llama Stack Distribution. You should have already built (or downloaded) and configured the distribution. positional arguments: config Path to config file to use for the run From ea4f13cc209e1222aadfab52224a48f687a6d483 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:07:24 +0800 Subject: [PATCH 16/22] chore: add container cmd check (#1306) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/distribution/build_container.sh | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 9b584a85c..68f8a0863 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -48,6 +48,9 @@ CONTAINER_OPTS=${CONTAINER_OPTS:-} TEMP_DIR=$(mktemp -d) +SCRIPT_DIR=$(dirname "$(readlink -f "$0")") +source "$SCRIPT_DIR/common.sh" + add_to_container() { output_file="$TEMP_DIR/Containerfile" if [ -t 0 ]; then @@ -58,6 +61,12 @@ add_to_container() { fi } +# Check if container command is available +if ! is_command_available $CONTAINER_BINARY; then + printf "${RED}Error: ${CONTAINER_BINARY} command not found. Is ${CONTAINER_BINARY} installed and in your PATH?${NC}" >&2 + exit 1 +fi + # Update and install UBI9 components if UBI9 base image is used if [[ $container_base == *"registry.access.redhat.com/ubi9"* ]]; then add_to_container << EOF @@ -212,7 +221,7 @@ if [ "$USE_COPY_NOT_MOUNT" != "true" ]; then fi fi -if command -v selinuxenabled &>/dev/null && selinuxenabled; then +if is_command_available selinuxenabled && selinuxenabled; then # Disable SELinux labels -- we don't want to relabel the llama-stack source dir CLI_ARGS+=("--security-opt" "label=disable") fi From 14c442f177591ab336414f0017d4da3d1e20a088 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:08:05 +0800 Subject: [PATCH 17/22] chore: update cmd check (#1293) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- llama_stack/distribution/build_conda_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh index 31b3e1b21..1eac2ee08 100755 --- a/llama_stack/distribution/build_conda_env.sh +++ b/llama_stack/distribution/build_conda_env.sh @@ -52,7 +52,7 @@ ensure_conda_env_python310() { local python_version="3.10" # Check if conda command is available - if ! command -v conda &>/dev/null; then + if ! is_command_available conda; then printf "${RED}Error: conda command not found. Is Conda installed and in your PATH?${NC}" >&2 exit 1 fi From 66cd128ab51aff0b649c8ae59d7ec139a54913c1 Mon Sep 17 00:00:00 2001 From: Reid <61492567+reidliu41@users.noreply.github.com> Date: Sat, 1 Mar 2025 02:10:12 +0800 Subject: [PATCH 18/22] docs: update the downloaded list doc (#1266) # What does this PR do? [Provide a short summary of what this PR does and why. Link to relevant issues if applicable.] Since released the `--downloaded` option, so update the related documents. [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan [Describe the tests you ran to verify your changes with result summaries. *Provide clear instructions so the plan can be easily re-executed.*] [//]: # (## Documentation) Signed-off-by: reidliu Co-authored-by: reidliu --- .../self_hosted_distro/meta-reference-gpu.md | 27 +++++++++++++--- .../meta-reference-quantized-gpu.md | 27 +++++++++++++--- .../llama_cli_reference/download_models.md | 32 +++++++++++++++++++ .../references/llama_cli_reference/index.md | 32 +++++++++++++++++++ .../meta-reference-gpu/doc_template.md | 27 +++++++++++++--- .../doc_template.md | 27 +++++++++++++--- 6 files changed, 156 insertions(+), 16 deletions(-) diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md index b183757db..b8d1b1714 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md index 9aeb7a88b..a49175e22 100644 --- a/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md +++ b/docs/source/distributions/self_hosted_distro/meta-reference-quantized-gpu.md @@ -41,12 +41,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/docs/source/references/llama_cli_reference/download_models.md b/docs/source/references/llama_cli_reference/download_models.md index 6c791bcb7..ca470f8c2 100644 --- a/docs/source/references/llama_cli_reference/download_models.md +++ b/docs/source/references/llama_cli_reference/download_models.md @@ -129,3 +129,35 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern **Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. + +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` diff --git a/docs/source/references/llama_cli_reference/index.md b/docs/source/references/llama_cli_reference/index.md index a43666963..8a38fc3ae 100644 --- a/docs/source/references/llama_cli_reference/index.md +++ b/docs/source/references/llama_cli_reference/index.md @@ -154,6 +154,38 @@ llama download --source huggingface --model-id Prompt-Guard-86M --ignore-pattern > **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. +## List the downloaded models + +To list the downloaded models with the following command: +``` +llama model list --downloaded +``` + +You should see a table like this: +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ +``` + ## Understand the models The `llama model` command helps you explore the model’s interface. diff --git a/llama_stack/templates/meta-reference-gpu/doc_template.md b/llama_stack/templates/meta-reference-gpu/doc_template.md index 60556a6f3..87438fb6d 100644 --- a/llama_stack/templates/meta-reference-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-gpu/doc_template.md @@ -29,12 +29,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution diff --git a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md index 2b117120c..e8dfaaf3c 100644 --- a/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md +++ b/llama_stack/templates/meta-reference-quantized-gpu/doc_template.md @@ -31,12 +31,31 @@ The following environment variables can be configured: ## Prerequisite: Downloading Models -Please make sure you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. +Please use `llama model list --downloaded` to check that you have llama model checkpoints downloaded in `~/.llama` before proceeding. See [installation guide](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) here to download the models. Run `llama model list` to see the available models to download, and `llama model download` to download the checkpoints. ``` -$ ls ~/.llama/checkpoints -Llama3.1-8B Llama3.2-11B-Vision-Instruct Llama3.2-1B-Instruct Llama3.2-90B-Vision-Instruct Llama-Guard-3-8B -Llama3.1-8B-Instruct Llama3.2-1B Llama3.2-3B-Instruct Llama-Guard-3-1B Prompt-Guard-86M +$ llama model list --downloaded +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━┓ +┃ Model ┃ Size ┃ Modified Time ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━┩ +│ Llama3.2-1B-Instruct:int4-qlora-eo8 │ 1.53 GB │ 2025-02-26 11:22:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B │ 2.31 GB │ 2025-02-18 21:48:52 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Prompt-Guard-86M │ 0.02 GB │ 2025-02-26 11:29:28 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B-Instruct:int4-spinquant-eo8 │ 3.69 GB │ 2025-02-26 11:37:41 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-3B │ 5.99 GB │ 2025-02-18 21:51:26 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.1-8B │ 14.97 GB │ 2025-02-16 10:36:37 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama3.2-1B-Instruct:int4-spinquant-eo8 │ 1.51 GB │ 2025-02-26 11:35:02 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B │ 2.80 GB │ 2025-02-26 11:20:46 │ +├─────────────────────────────────────────┼──────────┼─────────────────────┤ +│ Llama-Guard-3-1B:int4 │ 0.43 GB │ 2025-02-26 11:33:33 │ +└─────────────────────────────────────────┴──────────┴─────────────────────┘ ``` ## Running the Distribution From 6520baebed13c1cbf4227f84d0dcd6e77bcf9ba7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 11:10:45 -0800 Subject: [PATCH 19/22] fix: replace eval with json decoding (#1327) # What does this PR do? - Using `eval` on server is a security risk - Replace `eval` with `json.loads` [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` pytest -v -s --nbval-lax ./llama-stack/docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ``` image [//]: # (## Documentation) --- .../providers/inline/eval/meta_reference/eval.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/providers/inline/eval/meta_reference/eval.py b/llama_stack/providers/inline/eval/meta_reference/eval.py index 48157b018..a01f7f1f3 100644 --- a/llama_stack/providers/inline/eval/meta_reference/eval.py +++ b/llama_stack/providers/inline/eval/meta_reference/eval.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import json from typing import Any, Dict, List, Optional from tqdm import tqdm @@ -116,7 +117,7 @@ class MetaReferenceEvalImpl( generations = [] for i, x in tqdm(enumerate(input_rows)): assert ColumnName.chat_completion_input.value in x, "Invalid input row" - input_messages = eval(str(x[ColumnName.chat_completion_input.value])) + input_messages = json.loads(x[ColumnName.chat_completion_input.value]) input_messages = [UserMessage(**x) for x in input_messages] # NOTE: only single-turn agent generation is supported. Create a new session for each input row @@ -158,7 +159,7 @@ class MetaReferenceEvalImpl( generations = [] for x in tqdm(input_rows): if ColumnName.completion_input.value in x: - input_content = eval(str(x[ColumnName.completion_input.value])) + input_content = json.loads(x[ColumnName.completion_input.value]) response = await self.inference_api.completion( model=candidate.model, content=input_content, @@ -166,9 +167,8 @@ class MetaReferenceEvalImpl( ) generations.append({ColumnName.generated_answer.value: response.completion_message.content}) elif ColumnName.chat_completion_input.value in x: - chat_completion_input_str = str(x[ColumnName.chat_completion_input.value]) - input_messages = eval(chat_completion_input_str) - input_messages = [UserMessage(**x) for x in input_messages] + chat_completion_input_json = json.loads(x[ColumnName.chat_completion_input.value]) + input_messages = [UserMessage(**x) for x in chat_completion_input_json] messages = [] if candidate.system_message: messages.append(candidate.system_message) From 5547ef953c304858d80b1ffa6b0f8226c3aad497 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 28 Feb 2025 11:16:12 -0800 Subject: [PATCH 20/22] feat: enhance OpenAPI spec to include Error types (#1320) # What does this PR do? An API spec must talk about Error handling. This was a pretty glaring omission so far. This PR begins to address it by adding a set of standard error responses we can attach to all our API calls. At a future point, we can add specific error types where necessary (although we should not hurry to do that; it is best done very late.) ## Test Plan Checked that Stainless SDK generation succeeds. --- docs/_static/llama-stack-spec.html | 1076 ++++++++++++++++- docs/_static/llama-stack-spec.yaml | 894 +++++++++++++- docs/openapi_generator/generate.py | 1 + docs/openapi_generator/pyopenapi/generator.py | 82 ++ docs/openapi_generator/pyopenapi/options.py | 2 + llama_stack/apis/datatypes.py | 20 + 6 files changed, 2073 insertions(+), 2 deletions(-) diff --git a/docs/_static/llama-stack-spec.html b/docs/_static/llama-stack-spec.html index 2a9f4b6f7..6b98cad90 100644 --- a/docs/_static/llama-stack-spec.html +++ b/docs/_static/llama-stack-spec.html @@ -52,6 +52,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -97,6 +109,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -128,6 +152,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -159,6 +195,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -183,6 +231,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -219,6 +279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -255,6 +327,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -286,6 +370,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -317,6 +413,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -362,6 +470,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -410,6 +530,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -438,6 +570,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -462,6 +606,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -492,6 +648,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -532,6 +700,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -570,6 +750,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -608,6 +800,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -648,6 +852,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -679,6 +895,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -719,6 +947,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -773,6 +1013,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -826,6 +1078,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -863,6 +1127,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -884,6 +1160,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -921,6 +1209,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -942,6 +1242,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -979,6 +1291,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1016,6 +1340,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1046,6 +1382,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1084,6 +1432,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1124,6 +1484,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1154,6 +1526,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1175,6 +1559,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1205,6 +1601,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1242,6 +1650,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1279,6 +1699,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1309,6 +1741,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1337,6 +1781,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1373,6 +1829,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1422,6 +1890,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1443,6 +1923,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1473,6 +1965,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1487,6 +1991,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1511,6 +2027,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1542,6 +2070,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1580,6 +2120,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1609,6 +2161,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1647,6 +2211,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1685,6 +2261,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1697,6 +2285,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1728,6 +2328,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1740,6 +2352,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1771,6 +2395,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1802,6 +2438,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1821,6 +2469,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1852,6 +2512,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1873,6 +2545,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1894,6 +2578,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1932,6 +2628,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1944,6 +2652,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1975,6 +2695,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -1994,6 +2726,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2025,6 +2769,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2037,6 +2793,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2068,6 +2836,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2098,6 +2878,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2117,6 +2909,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2141,6 +2945,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2172,6 +2988,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2203,6 +3031,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2234,6 +3074,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2265,6 +3117,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2296,6 +3160,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2332,6 +3208,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2391,6 +3279,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2431,6 +3331,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2455,6 +3367,18 @@ "responses": { "200": { "description": "OK" + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2486,6 +3410,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2517,6 +3453,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2548,6 +3496,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2579,6 +3539,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2610,6 +3582,18 @@ } } } + }, + "400": { + "$ref": "#/components/responses/BadRequest400" + }, + "429": { + "$ref": "#/components/responses/TooManyRequests429" + }, + "500": { + "$ref": "#/components/responses/InternalServerError500" + }, + "default": { + "$ref": "#/components/responses/DefaultError" } }, "tags": [ @@ -2623,6 +3607,35 @@ "jsonSchemaDialect": "https://json-schema.org/draft/2020-12/schema", "components": { "schemas": { + "Error": { + "type": "object", + "properties": { + "status": { + "type": "integer", + "description": "HTTP status code" + }, + "title": { + "type": "string", + "description": "Error title, a short summary of the error which is invariant for an error type" + }, + "detail": { + "type": "string", + "description": "Error detail, a longer human-readable description of the error" + }, + "instance": { + "type": "string", + "description": "(Optional) A URL which can be used to retrieve more information about the specific occurrence of the error" + } + }, + "additionalProperties": false, + "required": [ + "status", + "title", + "detail" + ], + "title": "Error", + "description": "Error response from the API. Roughly follows RFC 7807." + }, "AppendRowsRequest": { "type": "object", "properties": { @@ -8741,7 +9754,68 @@ "title": "VersionInfo" } }, - "responses": {} + "responses": { + "BadRequest400": { + "description": "The request was invalid or malformed", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed" + } + } + } + }, + "TooManyRequests429": { + "description": "The client has sent too many requests in a given amount of time", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later." + } + } + } + }, + "InternalServerError500": { + "description": "The server encountered an unexpected error", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified." + } + } + } + }, + "DefaultError": { + "description": "An unexpected error occurred", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Error" + }, + "example": { + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred" + } + } + } + } + } }, "security": [ { diff --git a/docs/_static/llama-stack-spec.yaml b/docs/_static/llama-stack-spec.yaml index a2329e47a..13f7edc4b 100644 --- a/docs/_static/llama-stack-spec.yaml +++ b/docs/_static/llama-stack-spec.yaml @@ -19,6 +19,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PaginatedRowsResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -47,6 +57,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - DatasetIO description: '' @@ -66,6 +86,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchChatCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -85,6 +115,16 @@ paths: application/json: schema: $ref: '#/components/schemas/BatchCompletionResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - BatchInference (Coming Soon) description: '' @@ -100,6 +140,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -124,6 +174,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/ChatCompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -149,6 +209,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/CompletionResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -169,6 +239,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -188,6 +268,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentSessionCreateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -217,6 +307,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -246,6 +346,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBucketResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all buckets. @@ -263,6 +373,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileUploadResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -279,6 +399,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -297,6 +427,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Session' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -322,6 +462,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -345,6 +495,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -371,6 +531,16 @@ paths: application/json: schema: $ref: '#/components/schemas/FileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -401,6 +571,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EmbeddingsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inference description: >- @@ -421,6 +601,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -445,6 +635,16 @@ paths: application/json: schema: $ref: '#/components/schemas/AgentStepResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -478,6 +678,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Turn' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: '' @@ -508,6 +718,16 @@ paths: oneOf: - $ref: '#/components/schemas/Benchmark' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -528,6 +748,16 @@ paths: oneOf: - $ref: '#/components/schemas/Dataset' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -541,6 +771,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -561,6 +801,16 @@ paths: oneOf: - $ref: '#/components/schemas/Model' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -574,6 +824,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -594,6 +854,16 @@ paths: oneOf: - $ref: '#/components/schemas/ScoringFn' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -614,6 +884,16 @@ paths: oneOf: - $ref: '#/components/schemas/Shield' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -632,6 +912,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Span' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -655,6 +945,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpanTreeResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -679,6 +979,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Tool' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -697,6 +1007,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolGroup' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: '' @@ -710,6 +1030,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Unregister a tool group @@ -728,6 +1058,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Trace' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -748,6 +1088,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobArtifactsResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -768,6 +1118,16 @@ paths: oneOf: - $ref: '#/components/schemas/PostTrainingJobStatusResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -786,6 +1146,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListPostTrainingJobsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -801,6 +1171,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileUploadResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -822,6 +1202,16 @@ paths: oneOf: - $ref: '#/components/schemas/FileResponse' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: >- @@ -852,6 +1242,16 @@ paths: oneOf: - $ref: '#/components/schemas/VectorDB' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -865,6 +1265,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -883,6 +1293,16 @@ paths: application/json: schema: $ref: '#/components/schemas/HealthInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -892,6 +1312,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -908,6 +1338,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -927,6 +1367,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ToolInvocationResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: Run a tool with the given arguments @@ -948,6 +1398,16 @@ paths: oneOf: - $ref: '#/components/schemas/JobStatus' - type: 'null' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -966,6 +1426,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -989,6 +1459,16 @@ paths: application/json: schema: $ref: '#/components/schemas/EvaluateResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1012,6 +1492,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListBenchmarksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1020,6 +1510,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Benchmarks description: '' @@ -1039,6 +1539,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListDatasetsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1047,6 +1557,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Datasets description: '' @@ -1066,6 +1586,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListFileResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Files (Coming Soon) description: List all files in a bucket. @@ -1085,6 +1615,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListModelsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1097,6 +1637,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Model' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Models description: '' @@ -1116,6 +1666,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListProvidersResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1129,6 +1689,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListRoutesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1142,6 +1712,16 @@ paths: application/jsonl: schema: $ref: '#/components/schemas/ToolDef' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: '' @@ -1165,6 +1745,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListScoringFunctionsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1173,6 +1763,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ScoringFunctions description: '' @@ -1192,6 +1792,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListShieldsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1204,6 +1814,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Shield' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Shields description: '' @@ -1223,6 +1843,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolGroupsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tool groups with optional provider @@ -1231,6 +1861,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: Register a tool group @@ -1250,6 +1890,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListToolsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolGroups description: List tools with optional tool group @@ -1268,6 +1918,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ListVectorDBsResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1280,6 +1940,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VectorDB' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorDBs description: '' @@ -1295,6 +1965,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1314,6 +1994,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1333,6 +2023,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RAGQueryResult' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - ToolRuntime description: >- @@ -1353,6 +2053,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryChunksResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - VectorIO description: '' @@ -1372,6 +2082,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QuerySpansResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1391,6 +2111,16 @@ paths: application/json: schema: $ref: '#/components/schemas/QueryTracesResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1415,6 +2145,16 @@ paths: text/event-stream: schema: $ref: '#/components/schemas/AgentTurnResponseStreamChunk' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Agents description: >- @@ -1457,6 +2197,16 @@ paths: application/json: schema: $ref: '#/components/schemas/Job' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Eval description: '' @@ -1481,6 +2231,16 @@ paths: application/json: schema: $ref: '#/components/schemas/RunShieldResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Safety description: '' @@ -1496,6 +2256,16 @@ paths: responses: '200': description: OK + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Telemetry description: '' @@ -1515,6 +2285,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1534,6 +2314,16 @@ paths: application/json: schema: $ref: '#/components/schemas/ScoreBatchResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Scoring description: '' @@ -1553,6 +2343,16 @@ paths: application/json: schema: $ref: '#/components/schemas/PostTrainingJob' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - PostTraining (Coming Soon) description: '' @@ -1572,6 +2372,16 @@ paths: application/json: schema: $ref: '#/components/schemas/SyntheticDataGenerationResponse' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - SyntheticDataGeneration (Coming Soon) description: '' @@ -1591,6 +2401,16 @@ paths: application/json: schema: $ref: '#/components/schemas/VersionInfo' + '400': + $ref: '#/components/responses/BadRequest400' + '429': + $ref: >- + #/components/responses/TooManyRequests429 + '500': + $ref: >- + #/components/responses/InternalServerError500 + default: + $ref: '#/components/responses/DefaultError' tags: - Inspect description: '' @@ -1599,6 +2419,34 @@ jsonSchemaDialect: >- https://json-schema.org/draft/2020-12/schema components: schemas: + Error: + type: object + properties: + status: + type: integer + description: HTTP status code + title: + type: string + description: >- + Error title, a short summary of the error which is invariant for an error + type + detail: + type: string + description: >- + Error detail, a longer human-readable description of the error + instance: + type: string + description: >- + (Optional) A URL which can be used to retrieve more information about + the specific occurrence of the error + additionalProperties: false + required: + - status + - title + - detail + title: Error + description: >- + Error response from the API. Roughly follows RFC 7807. AppendRowsRequest: type: object properties: @@ -5626,7 +6474,51 @@ components: required: - version title: VersionInfo - responses: {} + responses: + BadRequest400: + description: The request was invalid or malformed + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 400 + title: Bad Request + detail: The request was invalid or malformed + TooManyRequests429: + description: >- + The client has sent too many requests in a given amount of time + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 429 + title: Too Many Requests + detail: >- + You have exceeded the rate limit. Please try again later. + InternalServerError500: + description: >- + The server encountered an unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 500 + title: Internal Server Error + detail: >- + An unexpected error occurred. Our team has been notified. + DefaultError: + description: An unexpected error occurred + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + example: + status: 0 + title: Error + detail: An unexpected error occurred security: - Default: [] tags: diff --git a/docs/openapi_generator/generate.py b/docs/openapi_generator/generate.py index dcbee7d2f..a2553f905 100644 --- a/docs/openapi_generator/generate.py +++ b/docs/openapi_generator/generate.py @@ -55,6 +55,7 @@ def main(output_dir: str): a set of endpoints and their corresponding interfaces that are tailored to best leverage Llama Models.""", ), + include_standard_error_responses=True, ), ) diff --git a/docs/openapi_generator/pyopenapi/generator.py b/docs/openapi_generator/pyopenapi/generator.py index 4220cfc05..91f32e6c8 100644 --- a/docs/openapi_generator/pyopenapi/generator.py +++ b/docs/openapi_generator/pyopenapi/generator.py @@ -10,6 +10,7 @@ import typing from dataclasses import make_dataclass from typing import Any, Dict, Set, Union +from llama_stack.apis.datatypes import Error from llama_stack.strong_typing.core import JsonType from llama_stack.strong_typing.docstring import Docstring, parse_type from llama_stack.strong_typing.inspection import ( @@ -434,6 +435,75 @@ class Generator: ) self.schema_builder = SchemaBuilder(schema_generator) self.responses = {} + + # Create standard error responses + self._create_standard_error_responses() + + def _create_standard_error_responses(self) -> None: + """ + Creates standard error responses that can be reused across operations. + These will be added to the components.responses section of the OpenAPI document. + """ + # Get the Error schema + error_schema = self.schema_builder.classdef_to_ref(Error) + + # Create standard error responses + self.responses["BadRequest400"] = Response( + description="The request was invalid or malformed", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 400, + "title": "Bad Request", + "detail": "The request was invalid or malformed", + } + ) + } + ) + + self.responses["TooManyRequests429"] = Response( + description="The client has sent too many requests in a given amount of time", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 429, + "title": "Too Many Requests", + "detail": "You have exceeded the rate limit. Please try again later.", + } + ) + } + ) + + self.responses["InternalServerError500"] = Response( + description="The server encountered an unexpected error", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 500, + "title": "Internal Server Error", + "detail": "An unexpected error occurred. Our team has been notified.", + } + ) + } + ) + + # Add a default error response for any unhandled error cases + self.responses["DefaultError"] = Response( + description="An unexpected error occurred", + content={ + "application/json": MediaType( + schema=error_schema, + example={ + "status": 0, + "title": "Error", + "detail": "An unexpected error occurred", + } + ) + } + ) def _build_type_tag(self, ref: str, schema: Schema) -> Tag: # Don't include schema definition in the tag description because for one, @@ -649,6 +719,18 @@ class Generator: responses.update(response_builder.build_response(response_options)) assert len(responses.keys()) > 0, f"No responses found for {op.name}" + + # Add standard error response references + if self.options.include_standard_error_responses: + if "400" not in responses: + responses["400"] = ResponseRef("BadRequest400") + if "429" not in responses: + responses["429"] = ResponseRef("TooManyRequests429") + if "500" not in responses: + responses["500"] = ResponseRef("InternalServerError500") + if "default" not in responses: + responses["default"] = ResponseRef("DefaultError") + if op.event_type is not None: builder = ContentBuilder(self.schema_builder) callbacks = { diff --git a/docs/openapi_generator/pyopenapi/options.py b/docs/openapi_generator/pyopenapi/options.py index f80da453b..edc861ad5 100644 --- a/docs/openapi_generator/pyopenapi/options.py +++ b/docs/openapi_generator/pyopenapi/options.py @@ -35,6 +35,7 @@ class Options: :param error_wrapper: True if errors are encapsulated in an error object wrapper. :param property_description_fun: Custom transformation function to apply to class property documentation strings. :param captions: User-defined captions for sections such as "Operations" or "Types", and (if applicable) groups of extra types. + :param include_standard_error_responses: Whether to include standard error responses (400, 429, 500, 503) in all operations. """ server: Server @@ -52,6 +53,7 @@ class Options: error_wrapper: bool = False property_description_fun: Optional[Callable[[type, str, str], str]] = None captions: Optional[Dict[str, str]] = None + include_standard_error_responses: bool = True default_captions: ClassVar[Dict[str, str]] = { "Operations": "Operations", diff --git a/llama_stack/apis/datatypes.py b/llama_stack/apis/datatypes.py index 6df93052c..842a2b63d 100644 --- a/llama_stack/apis/datatypes.py +++ b/llama_stack/apis/datatypes.py @@ -5,6 +5,9 @@ # the root directory of this source tree. from enum import Enum +from typing import Optional + +from pydantic import BaseModel from llama_stack.schema_utils import json_schema_type @@ -33,3 +36,20 @@ class Api(Enum): # built-in API inspect = "inspect" + + +@json_schema_type +class Error(BaseModel): + """ + Error response from the API. Roughly follows RFC 7807. + + :param status: HTTP status code + :param title: Error title, a short summary of the error which is invariant for an error type + :param detail: Error detail, a longer human-readable description of the error + :param instance: (Optional) A URL which can be used to retrieve more information about the specific occurrence of the error + """ + + status: int + title: str + detail: str + instance: Optional[str] = None From 15f69e75ffaf07c79edf1cdcef1c31d0b67bbc3d Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 11:25:23 -0800 Subject: [PATCH 21/22] fix: replace eval with json decoding for format_adapter (#1328) # What does this PR do? - using `eval` is a security risk [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan - see https://github.com/meta-llama/llama-stack/pull/1327 cc @SLR722 we will need to update the corresponding dataset via ```python def update_to_json_str(): dataset = datasets.load_dataset(...) processed_dataset = dataset[split].map( lambda x: { "column": json.dumps(eval(x["column"])) } ) processed_dataset.push_to_hub(...) ``` [//]: # (## Documentation) --- .../post_training/torchtune/datasets/format_adapter.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py index 884977803..6b607f1c7 100644 --- a/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py +++ b/llama_stack/providers/inline/post_training/torchtune/datasets/format_adapter.py @@ -10,16 +10,19 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import json from typing import Any, Mapping from llama_stack.providers.utils.common.data_schema_validator import ColumnName -def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Mapping[str, Any]: +def llama_stack_instruct_to_torchtune_instruct( + sample: Mapping[str, Any], +) -> Mapping[str, Any]: assert ColumnName.chat_completion_input.value in sample and ColumnName.expected_answer.value in sample, ( "Invalid input row" ) - input_messages = eval(str(sample[ColumnName.chat_completion_input.value])) + input_messages = json.loads(sample[ColumnName.chat_completion_input.value]) assert len(input_messages) == 1, "llama stack intruct dataset format only supports 1 user message" input_message = input_messages[0] @@ -37,7 +40,7 @@ def llama_stack_instruct_to_torchtune_instruct(sample: Mapping[str, Any]) -> Map def llama_stack_chat_to_torchtune_chat(sample: Mapping[str, Any]) -> Mapping[str, Any]: assert ColumnName.dialog.value in sample, "Invalid input row" role_map = {"user": "human", "assistant": "gpt"} - dialog = eval(str(sample[ColumnName.dialog.value])) + dialog = json.loads(sample[ColumnName.dialog.value]) assert len(dialog) > 1, "dialog must have at least 2 messagse" roles = [] From 82fa0803faee41ae0e74a5e97066cdb78bfee294 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Fri, 28 Feb 2025 12:29:50 -0800 Subject: [PATCH 22/22] chore: refactor client tool in test (#1331) # What does this PR do? Use @client_tool decorator instead of ClientTool [//]: # (If resolving an issue, uncomment and update the line below) [//]: # (Closes #[issue-number]) ## Test Plan ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/client-sdk/agents/test_agents.py --inference-model "meta-llama/Llama-3.3-70B-Instruct" ``` image [//]: # (## Documentation) --- tests/client-sdk/agents/test_agents.py | 82 ++++++-------------------- 1 file changed, 18 insertions(+), 64 deletions(-) diff --git a/tests/client-sdk/agents/test_agents.py b/tests/client-sdk/agents/test_agents.py index 8f68699b2..9690a8139 100644 --- a/tests/client-sdk/agents/test_agents.py +++ b/tests/client-sdk/agents/test_agents.py @@ -4,20 +4,15 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -from typing import Dict, List from uuid import uuid4 import pytest from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.client_tool import ClientTool +from llama_stack_client.lib.agents.client_tool import client_tool from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import ToolResponseMessage from llama_stack_client.types.agents.turn_create_params import Document as AgentDocument from llama_stack_client.types.memory_insert_params import Document -from llama_stack_client.types.shared.completion_message import CompletionMessage from llama_stack_client.types.shared_params.agent_config import AgentConfig, ToolConfig -from llama_stack_client.types.tool_def_param import Parameter from llama_stack.apis.agents.agents import ( AgentConfig as Server__AgentConfig, @@ -27,63 +22,22 @@ from llama_stack.apis.agents.agents import ( ) -class TestClientTool(ClientTool): - """Tool to give boiling point of a liquid - Returns the correct value for polyjuice in Celcius and Fahrenheit - and returns -1 for other liquids +@client_tool +def get_boiling_point(liquid_name: str, celcius: bool = True) -> int: """ + Returns the boiling point of a liquid in Celcius or Fahrenheit - def run(self, messages: List[CompletionMessage]) -> List[ToolResponseMessage]: - assert len(messages) == 1, "Expected single message" - - message = messages[0] - - tool_call = message.tool_calls[0] - - try: - response = self.run_impl(**tool_call.arguments) - response_str = json.dumps(response, ensure_ascii=False) - except Exception as e: - response_str = f"Error when running tool: {e}" - - message = ToolResponseMessage( - role="tool", - call_id=tool_call.call_id, - tool_name=tool_call.tool_name, - content=response_str, - ) - return message - - def get_name(self) -> str: - return "get_boiling_point" - - def get_description(self) -> str: - return "Get the boiling point of imaginary liquids (eg. polyjuice)" - - def get_params_definition(self) -> Dict[str, Parameter]: - return { - "liquid_name": Parameter( - name="liquid_name", - parameter_type="string", - description="The name of the liquid", - required=True, - ), - "celcius": Parameter( - name="celcius", - parameter_type="boolean", - description="Whether to return the boiling point in Celcius", - required=False, - ), - } - - def run_impl(self, liquid_name: str, celcius: bool = True) -> int: - if liquid_name.lower() == "polyjuice": - if celcius: - return -100 - else: - return -212 + :param liquid_name: The name of the liquid + :param celcius: Whether to return the boiling point in Celcius + :return: The boiling point of the liquid in Celcius or Fahrenheit + """ + if liquid_name.lower() == "polyjuice": + if celcius: + return -100 else: - return -1 + return -212 + else: + return -1 @pytest.fixture(scope="session") @@ -298,7 +252,7 @@ def test_code_interpreter_for_attachments(llama_stack_client, agent_config): def test_custom_tool(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "toolgroups": ["builtin::websearch"], @@ -326,7 +280,7 @@ def test_custom_tool(llama_stack_client, agent_config): def test_tool_choice(llama_stack_client, agent_config): def run_agent(tool_choice): - client_tool = TestClientTool() + client_tool = get_boiling_point test_agent_config = { **agent_config, @@ -362,7 +316,7 @@ def test_tool_choice(llama_stack_client, agent_config): # TODO: fix this flaky test def xtest_override_system_message_behavior(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "instructions": "You are a pirate", @@ -586,7 +540,7 @@ def test_rag_and_code_agent(llama_stack_client, agent_config): def test_create_turn_response(llama_stack_client, agent_config): - client_tool = TestClientTool() + client_tool = get_boiling_point agent_config = { **agent_config, "input_shields": [],