From 76b354a08193b0f7d2c945199388b12363da7ea7 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 16 Sep 2024 17:34:07 -0700 Subject: [PATCH] Codemod from llama_toolchain -> llama_stack - added providers/registry - cleaned up api/ subdirectories and moved impls away - restructured api/api.py - from llama_stack.apis. import foo should work now - update imports to do llama_stack.apis. - update many other imports - added __init__, fixed some registry imports - updated registry imports - create_agentic_system -> create_agent - AgenticSystem -> Agent --- docs/cli_reference.md | 2 +- docs/getting_started.md | 4 +- llama_stack/apis/agents/__init__.py | 7 + .../api/api.py => apis/agents/agents.py} | 148 ++++++++---------- .../{agentic_system => apis/agents}/client.py | 42 +++-- llama_stack/apis/batch_inference/__init__.py | 7 + .../batch_inference/batch_inference.py} | 2 +- llama_stack/apis/dataset/__init__.py | 7 + .../api/api.py => apis/dataset/dataset.py} | 0 .../{dataset/api => apis/evals}/__init__.py | 2 +- .../api/api.py => apis/evals/evals.py} | 2 +- llama_stack/apis/inference/__init__.py | 7 + llama_stack/{ => apis}/inference/client.py | 7 +- .../api.py => apis/inference/inference.py} | 0 llama_stack/apis/memory/__init__.py | 7 + llama_stack/{ => apis}/memory/client.py | 4 +- .../api/api.py => apis/memory/memory.py} | 0 llama_stack/apis/models/__init__.py | 7 + .../endpoints.py => apis/models/models.py} | 0 llama_stack/apis/post_training/__init__.py | 7 + .../post_training/post_training.py} | 2 +- llama_stack/apis/reward_scoring/__init__.py | 7 + .../reward_scoring/reward_scoring.py} | 0 llama_stack/apis/safety/__init__.py | 7 + llama_stack/{ => apis}/safety/client.py | 6 +- .../api/api.py => apis/safety/safety.py} | 0 .../synthetic_data_generation/__init__.py | 7 + .../synthetic_data_generation.py} | 2 +- llama_stack/apis/telemetry/__init__.py | 7 + .../api.py => apis/telemetry/telemetry.py} | 0 llama_stack/cli/llama.py | 10 -- .../conda/local-conda-example-build.yaml | 2 +- .../local-fireworks-conda-example-build.yaml | 2 +- .../local-ollama-conda-example-build.yaml | 2 +- .../conda/local-tgi-conda-example-build.yaml | 2 +- .../local-together-conda-example-build.yaml | 2 +- .../docker/local-docker-example-build.yaml | 2 +- llama_stack/core/datatypes.py | 2 +- llama_stack/core/distribution.py | 14 +- llama_stack/core/server.py | 2 +- llama_stack/inference/api/__init__.py | 7 - llama_stack/memory/api/__init__.py | 7 - llama_stack/post_training/api/__init__.py | 7 - .../{agentic_system => providers}/__init__.py | 0 .../adapters}/__init__.py | 0 .../adapters}/inference/__init__.py | 0 .../adapters/inference}/fireworks/__init__.py | 0 .../adapters/inference}/fireworks/config.py | 0 .../inference}/fireworks/fireworks.py | 11 +- .../adapters/inference}/ollama/__init__.py | 0 .../adapters/inference}/ollama/ollama.py | 9 +- .../adapters/inference}/tgi/__init__.py | 0 .../adapters/inference}/tgi/config.py | 0 .../adapters/inference}/tgi/tgi.py | 8 +- .../adapters/inference}/together/__init__.py | 0 .../adapters/inference}/together/config.py | 0 .../adapters/inference}/together/together.py | 9 +- .../adapters/memory}/__init__.py | 0 .../adapters/memory}/chroma/__init__.py | 0 .../adapters/memory}/chroma/chroma.py | 7 +- .../adapters/memory}/pgvector/__init__.py | 0 .../adapters/memory}/pgvector/config.py | 0 .../adapters/memory}/pgvector/pgvector.py | 4 +- .../{memory => providers/impls}/__init__.py | 0 .../impls/meta_reference/agents}/__init__.py | 4 +- .../meta_reference/agents}/agent_instance.py | 118 +++++++------- .../impls/meta_reference/agents/agents.py} | 26 +-- .../impls/meta_reference/agents}/config.py | 0 .../agents}/rag/context_retriever.py | 4 +- .../impls/meta_reference/agents}/safety.py | 4 +- .../meta_reference/inference}/__init__.py | 0 .../impls/meta_reference/inference}/config.py | 4 +- .../meta_reference/inference}/generation.py | 4 +- .../meta_reference/inference}/inference.py | 10 +- .../inference}/model_parallel.py | 0 .../inference}/parallel_utils.py | 0 .../inference/quantization/fp8_impls.py | 0 .../inference/quantization/loader.py | 4 +- .../quantization/scripts/build_conda.sh | 0 .../scripts/quantize_checkpoint.py | 0 .../scripts/run_quantize_checkpoint.sh | 0 .../inference/quantization/test_fp8.py | 0 .../impls/meta_reference/memory}/__init__.py | 0 .../impls/meta_reference/memory}/config.py | 0 .../impls/meta_reference/memory}/faiss.py | 7 +- .../impls/meta_reference/safety}/__init__.py | 0 .../impls/meta_reference/safety}/config.py | 0 .../impls/meta_reference/safety}/safety.py | 2 +- .../safety}/shields/__init__.py | 0 .../meta_reference/safety}/shields/base.py | 2 +- .../safety}/shields/code_scanner.py | 2 +- .../safety/shields/contrib}/__init__.py | 0 .../shields/contrib/third_party_shield.py | 0 .../safety}/shields/llama_guard.py | 2 +- .../safety}/shields/prompt_guard.py | 2 +- .../meta_reference/telemetry}/__init__.py | 0 .../impls/meta_reference/telemetry}/config.py | 0 .../meta_reference/telemetry}/console.py | 2 +- .../registry}/__init__.py | 0 .../registry/agents.py} | 6 +- .../registry/inference.py} | 18 +-- .../registry/memory.py} | 12 +- .../registry/safety.py} | 4 +- .../registry/telemetry.py} | 6 +- .../{safety => providers/routers}/__init__.py | 0 .../routers/memory}/__init__.py | 2 +- .../routers/memory/memory.py} | 2 +- .../contrib => providers/utils}/__init__.py | 0 .../utils/agents}/__init__.py | 0 .../utils/agents}/event_logger.py | 4 +- .../agents}/execute_with_custom_tools.py | 16 +- .../utils/inference}/__init__.py | 2 - .../utils}/inference/event_logger.py | 5 +- .../utils}/inference/prepare_messages.py | 2 +- .../utils/memory}/__init__.py | 2 - .../utils/memory}/file_utils.py | 0 .../utils/memory}/vector_store.py | 2 +- .../utils/telemetry}/__init__.py | 2 - .../utils}/telemetry/tracing.py | 2 +- llama_stack/reward_scoring/api/__init__.py | 7 - llama_stack/safety/api/__init__.py | 7 - llama_stack/stack.py | 22 +-- .../synthetic_data_generation/api/__init__.py | 7 - llama_stack/telemetry/api/__init__.py | 7 - llama_stack/tools/base.py | 2 +- llama_stack/tools/builtin.py | 4 +- llama_stack/tools/custom/datatypes.py | 2 +- llama_stack/tools/safety.py | 6 +- 128 files changed, 381 insertions(+), 376 deletions(-) create mode 100644 llama_stack/apis/agents/__init__.py rename llama_stack/{agentic_system/api/api.py => apis/agents/agents.py} (68%) rename llama_stack/{agentic_system => apis/agents}/client.py (83%) create mode 100644 llama_stack/apis/batch_inference/__init__.py rename llama_stack/{batch_inference/api/api.py => apis/batch_inference/batch_inference.py} (97%) create mode 100644 llama_stack/apis/dataset/__init__.py rename llama_stack/{dataset/api/api.py => apis/dataset/dataset.py} (100%) rename llama_stack/{dataset/api => apis/evals}/__init__.py (83%) rename llama_stack/{evaluations/api/api.py => apis/evals/evals.py} (98%) create mode 100644 llama_stack/apis/inference/__init__.py rename llama_stack/{ => apis}/inference/client.py (99%) rename llama_stack/{inference/api/api.py => apis/inference/inference.py} (100%) create mode 100644 llama_stack/apis/memory/__init__.py rename llama_stack/{ => apis}/memory/client.py (99%) rename llama_stack/{memory/api/api.py => apis/memory/memory.py} (100%) create mode 100644 llama_stack/apis/models/__init__.py rename llama_stack/{models/api/endpoints.py => apis/models/models.py} (100%) create mode 100644 llama_stack/apis/post_training/__init__.py rename llama_stack/{post_training/api/api.py => apis/post_training/post_training.py} (99%) create mode 100644 llama_stack/apis/reward_scoring/__init__.py rename llama_stack/{reward_scoring/api/api.py => apis/reward_scoring/reward_scoring.py} (100%) create mode 100644 llama_stack/apis/safety/__init__.py rename llama_stack/{ => apis}/safety/client.py (98%) rename llama_stack/{safety/api/api.py => apis/safety/safety.py} (100%) create mode 100644 llama_stack/apis/synthetic_data_generation/__init__.py rename llama_stack/{synthetic_data_generation/api/api.py => apis/synthetic_data_generation/synthetic_data_generation.py} (96%) create mode 100644 llama_stack/apis/telemetry/__init__.py rename llama_stack/{telemetry/api/api.py => apis/telemetry/telemetry.py} (100%) delete mode 100644 llama_stack/inference/api/__init__.py delete mode 100644 llama_stack/memory/api/__init__.py delete mode 100644 llama_stack/post_training/api/__init__.py rename llama_stack/{agentic_system => providers}/__init__.py (100%) rename llama_stack/{batch_inference => providers/adapters}/__init__.py (100%) rename llama_stack/{ => providers/adapters}/inference/__init__.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/fireworks/__init__.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/fireworks/config.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/fireworks/fireworks.py (97%) rename llama_stack/{inference/adapters => providers/adapters/inference}/ollama/__init__.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/ollama/ollama.py (97%) rename llama_stack/{inference/adapters => providers/adapters/inference}/tgi/__init__.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/tgi/config.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/tgi/tgi.py (97%) rename llama_stack/{inference/adapters => providers/adapters/inference}/together/__init__.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/together/config.py (100%) rename llama_stack/{inference/adapters => providers/adapters/inference}/together/together.py (97%) rename llama_stack/{inference/adapters => providers/adapters/memory}/__init__.py (100%) rename llama_stack/{memory/adapters => providers/adapters/memory}/chroma/__init__.py (100%) rename llama_stack/{memory/adapters => providers/adapters/memory}/chroma/chroma.py (97%) rename llama_stack/{memory/adapters => providers/adapters/memory}/pgvector/__init__.py (100%) rename llama_stack/{memory/adapters => providers/adapters/memory}/pgvector/config.py (100%) rename llama_stack/{memory/adapters => providers/adapters/memory}/pgvector/pgvector.py (98%) rename llama_stack/{memory => providers/impls}/__init__.py (100%) rename llama_stack/{agentic_system/meta_reference => providers/impls/meta_reference/agents}/__init__.py (86%) rename llama_stack/{agentic_system/meta_reference => providers/impls/meta_reference/agents}/agent_instance.py (87%) rename llama_stack/{agentic_system/meta_reference/agentic_system.py => providers/impls/meta_reference/agents/agents.py} (87%) rename llama_stack/{agentic_system/meta_reference => providers/impls/meta_reference/agents}/config.py (100%) rename llama_stack/{agentic_system/meta_reference => providers/impls/meta_reference/agents}/rag/context_retriever.py (95%) rename llama_stack/{agentic_system/meta_reference => providers/impls/meta_reference/agents}/safety.py (98%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/__init__.py (100%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/config.py (96%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/generation.py (99%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/inference.py (96%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/model_parallel.py (100%) rename llama_stack/{inference/meta_reference => providers/impls/meta_reference/inference}/parallel_utils.py (100%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/fp8_impls.py (100%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/loader.py (97%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/scripts/build_conda.sh (100%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/scripts/quantize_checkpoint.py (100%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/scripts/run_quantize_checkpoint.sh (100%) rename llama_stack/{ => providers/impls/meta_reference}/inference/quantization/test_fp8.py (100%) rename llama_stack/{memory/meta_reference/faiss => providers/impls/meta_reference/memory}/__init__.py (100%) rename llama_stack/{memory/meta_reference/faiss => providers/impls/meta_reference/memory}/config.py (100%) rename llama_stack/{memory/meta_reference/faiss => providers/impls/meta_reference/memory}/faiss.py (95%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/__init__.py (100%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/config.py (100%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/safety.py (98%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/__init__.py (100%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/base.py (97%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/code_scanner.py (95%) rename llama_stack/{memory/common => providers/impls/meta_reference/safety/shields/contrib}/__init__.py (100%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/contrib/third_party_shield.py (100%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/llama_guard.py (99%) rename llama_stack/{safety/meta_reference => providers/impls/meta_reference/safety}/shields/prompt_guard.py (99%) rename llama_stack/{telemetry/console => providers/impls/meta_reference/telemetry}/__init__.py (100%) rename llama_stack/{telemetry/console => providers/impls/meta_reference/telemetry}/config.py (100%) rename llama_stack/{telemetry/console => providers/impls/meta_reference/telemetry}/console.py (97%) rename llama_stack/{memory/meta_reference => providers/registry}/__init__.py (100%) rename llama_stack/{agentic_system/providers.py => providers/registry/agents.py} (80%) rename llama_stack/{inference/providers.py => providers/registry/inference.py} (67%) rename llama_stack/{memory/providers.py => providers/registry/memory.py} (68%) rename llama_stack/{safety/providers.py => providers/registry/safety.py} (79%) rename llama_stack/{telemetry/providers.py => providers/registry/telemetry.py} (67%) rename llama_stack/{safety => providers/routers}/__init__.py (100%) rename llama_stack/{memory/router => providers/routers/memory}/__init__.py (91%) rename llama_stack/{memory/router/router.py => providers/routers/memory/memory.py} (98%) rename llama_stack/{safety/meta_reference/shields/contrib => providers/utils}/__init__.py (100%) rename llama_stack/{telemetry => providers/utils/agents}/__init__.py (100%) rename llama_stack/{agentic_system => providers/utils/agents}/event_logger.py (98%) rename llama_stack/{agentic_system => providers/utils/agents}/execute_with_custom_tools.py (86%) rename llama_stack/{agentic_system/api => providers/utils/inference}/__init__.py (83%) rename llama_stack/{ => providers/utils}/inference/event_logger.py (97%) rename llama_stack/{ => providers/utils}/inference/prepare_messages.py (98%) rename llama_stack/{evaluations/api => providers/utils/memory}/__init__.py (83%) rename llama_stack/{memory/common => providers/utils/memory}/file_utils.py (100%) rename llama_stack/{memory/common => providers/utils/memory}/vector_store.py (99%) rename llama_stack/{batch_inference/api => providers/utils/telemetry}/__init__.py (83%) rename llama_stack/{ => providers/utils}/telemetry/tracing.py (99%) delete mode 100644 llama_stack/reward_scoring/api/__init__.py delete mode 100644 llama_stack/safety/api/__init__.py delete mode 100644 llama_stack/synthetic_data_generation/api/__init__.py delete mode 100644 llama_stack/telemetry/api/__init__.py diff --git a/docs/cli_reference.md b/docs/cli_reference.md index e62d03117..970627c57 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -482,7 +482,7 @@ Once the server is setup, we can test it with a client to see the example output cd /path/to/llama-stack conda activate # any environment containing the llama-toolchain pip package will work -python -m llama_stack.inference.client localhost 5000 +python -m llama_stack.apis.inference.client localhost 5000 ``` This will run the chat completion client and query the distribution’s /inference/chat_completion API. diff --git a/docs/getting_started.md b/docs/getting_started.md index fe6b1c573..3d12ac1ae 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -296,7 +296,7 @@ Once the server is setup, we can test it with a client to see the example output cd /path/to/llama-stack conda activate # any environment containing the llama-toolchain pip package will work -python -m llama_stack.inference.client localhost 5000 +python -m llama_stack.apis.inference.client localhost 5000 ``` This will run the chat completion client and query the distribution’s /inference/chat_completion API. @@ -314,7 +314,7 @@ You know what's even more hilarious? People like you who think they can just Goo Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: ``` -python -m llama_stack.safety.client localhost 5000 +python -m llama_stack.apis.safety.client localhost 5000 ``` You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo. diff --git a/llama_stack/apis/agents/__init__.py b/llama_stack/apis/agents/__init__.py new file mode 100644 index 000000000..ab203b6cd --- /dev/null +++ b/llama_stack/apis/agents/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .agents import * # noqa: F401 F403 diff --git a/llama_stack/agentic_system/api/api.py b/llama_stack/apis/agents/agents.py similarity index 68% rename from llama_stack/agentic_system/api/api.py rename to llama_stack/apis/agents/agents.py index 50920315f..5f55568b4 100644 --- a/llama_stack/agentic_system/api/api.py +++ b/llama_stack/apis/agents/agents.py @@ -15,9 +15,9 @@ from typing_extensions import Annotated from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.common.deployment_types import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.safety.api import * # noqa: F403 -from llama_stack.memory.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 @json_schema_type @@ -26,7 +26,7 @@ class Attachment(BaseModel): mime_type: str -class AgenticSystemTool(Enum): +class AgentTool(Enum): brave_search = "brave_search" wolfram_alpha = "wolfram_alpha" photogen = "photogen" @@ -50,41 +50,33 @@ class SearchEngineType(Enum): class SearchToolDefinition(ToolDefinitionCommon): # NOTE: brave_search is just a placeholder since model always uses # brave_search as tool call name - type: Literal[AgenticSystemTool.brave_search.value] = ( - AgenticSystemTool.brave_search.value - ) + type: Literal[AgentTool.brave_search.value] = AgentTool.brave_search.value engine: SearchEngineType = SearchEngineType.brave remote_execution: Optional[RestAPIExecutionConfig] = None @json_schema_type class WolframAlphaToolDefinition(ToolDefinitionCommon): - type: Literal[AgenticSystemTool.wolfram_alpha.value] = ( - AgenticSystemTool.wolfram_alpha.value - ) + type: Literal[AgentTool.wolfram_alpha.value] = AgentTool.wolfram_alpha.value remote_execution: Optional[RestAPIExecutionConfig] = None @json_schema_type class PhotogenToolDefinition(ToolDefinitionCommon): - type: Literal[AgenticSystemTool.photogen.value] = AgenticSystemTool.photogen.value + type: Literal[AgentTool.photogen.value] = AgentTool.photogen.value remote_execution: Optional[RestAPIExecutionConfig] = None @json_schema_type class CodeInterpreterToolDefinition(ToolDefinitionCommon): - type: Literal[AgenticSystemTool.code_interpreter.value] = ( - AgenticSystemTool.code_interpreter.value - ) + type: Literal[AgentTool.code_interpreter.value] = AgentTool.code_interpreter.value enable_inline_code_execution: bool = True remote_execution: Optional[RestAPIExecutionConfig] = None @json_schema_type class FunctionCallToolDefinition(ToolDefinitionCommon): - type: Literal[AgenticSystemTool.function_call.value] = ( - AgenticSystemTool.function_call.value - ) + type: Literal[AgentTool.function_call.value] = AgentTool.function_call.value function_name: str description: str parameters: Dict[str, ToolParamDefinition] @@ -95,30 +87,30 @@ class _MemoryBankConfigCommon(BaseModel): bank_id: str -class AgenticSystemVectorMemoryBankConfig(_MemoryBankConfigCommon): +class AgentVectorMemoryBankConfig(_MemoryBankConfigCommon): type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value -class AgenticSystemKeyValueMemoryBankConfig(_MemoryBankConfigCommon): +class AgentKeyValueMemoryBankConfig(_MemoryBankConfigCommon): type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value keys: List[str] # what keys to focus on -class AgenticSystemKeywordMemoryBankConfig(_MemoryBankConfigCommon): +class AgentKeywordMemoryBankConfig(_MemoryBankConfigCommon): type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value -class AgenticSystemGraphMemoryBankConfig(_MemoryBankConfigCommon): +class AgentGraphMemoryBankConfig(_MemoryBankConfigCommon): type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value entities: List[str] # what entities to focus on MemoryBankConfig = Annotated[ Union[ - AgenticSystemVectorMemoryBankConfig, - AgenticSystemKeyValueMemoryBankConfig, - AgenticSystemKeywordMemoryBankConfig, - AgenticSystemGraphMemoryBankConfig, + AgentVectorMemoryBankConfig, + AgentKeyValueMemoryBankConfig, + AgentKeywordMemoryBankConfig, + AgentGraphMemoryBankConfig, ], Field(discriminator="type"), ] @@ -158,7 +150,7 @@ MemoryQueryGeneratorConfig = Annotated[ class MemoryToolDefinition(ToolDefinitionCommon): - type: Literal[AgenticSystemTool.memory.value] = AgenticSystemTool.memory.value + type: Literal[AgentTool.memory.value] = AgentTool.memory.value memory_bank_configs: List[MemoryBankConfig] = Field(default_factory=list) # This config defines how a query is generated using the messages # for memory bank retrieval. @@ -169,7 +161,7 @@ class MemoryToolDefinition(ToolDefinitionCommon): max_chunks: int = 10 -AgenticSystemToolDefinition = Annotated[ +AgentToolDefinition = Annotated[ Union[ SearchToolDefinition, WolframAlphaToolDefinition, @@ -275,7 +267,7 @@ class AgentConfigCommon(BaseModel): input_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list) output_shields: Optional[List[ShieldDefinition]] = Field(default_factory=list) - tools: Optional[List[AgenticSystemToolDefinition]] = Field(default_factory=list) + tools: Optional[List[AgentToolDefinition]] = Field(default_factory=list) tool_choice: Optional[ToolChoice] = Field(default=ToolChoice.auto) tool_prompt_format: Optional[ToolPromptFormat] = Field( default=ToolPromptFormat.json @@ -292,7 +284,7 @@ class AgentConfigOverridablePerTurn(AgentConfigCommon): instructions: Optional[str] = None -class AgenticSystemTurnResponseEventType(Enum): +class AgentTurnResponseEventType(Enum): step_start = "step_start" step_complete = "step_complete" step_progress = "step_progress" @@ -302,9 +294,9 @@ class AgenticSystemTurnResponseEventType(Enum): @json_schema_type -class AgenticSystemTurnResponseStepStartPayload(BaseModel): - event_type: Literal[AgenticSystemTurnResponseEventType.step_start.value] = ( - AgenticSystemTurnResponseEventType.step_start.value +class AgentTurnResponseStepStartPayload(BaseModel): + event_type: Literal[AgentTurnResponseEventType.step_start.value] = ( + AgentTurnResponseEventType.step_start.value ) step_type: StepType step_id: str @@ -312,20 +304,20 @@ class AgenticSystemTurnResponseStepStartPayload(BaseModel): @json_schema_type -class AgenticSystemTurnResponseStepCompletePayload(BaseModel): - event_type: Literal[AgenticSystemTurnResponseEventType.step_complete.value] = ( - AgenticSystemTurnResponseEventType.step_complete.value +class AgentTurnResponseStepCompletePayload(BaseModel): + event_type: Literal[AgentTurnResponseEventType.step_complete.value] = ( + AgentTurnResponseEventType.step_complete.value ) step_type: StepType step_details: Step @json_schema_type -class AgenticSystemTurnResponseStepProgressPayload(BaseModel): +class AgentTurnResponseStepProgressPayload(BaseModel): model_config = ConfigDict(protected_namespaces=()) - event_type: Literal[AgenticSystemTurnResponseEventType.step_progress.value] = ( - AgenticSystemTurnResponseEventType.step_progress.value + event_type: Literal[AgentTurnResponseEventType.step_progress.value] = ( + AgentTurnResponseEventType.step_progress.value ) step_type: StepType step_id: str @@ -336,49 +328,49 @@ class AgenticSystemTurnResponseStepProgressPayload(BaseModel): @json_schema_type -class AgenticSystemTurnResponseTurnStartPayload(BaseModel): - event_type: Literal[AgenticSystemTurnResponseEventType.turn_start.value] = ( - AgenticSystemTurnResponseEventType.turn_start.value +class AgentTurnResponseTurnStartPayload(BaseModel): + event_type: Literal[AgentTurnResponseEventType.turn_start.value] = ( + AgentTurnResponseEventType.turn_start.value ) turn_id: str @json_schema_type -class AgenticSystemTurnResponseTurnCompletePayload(BaseModel): - event_type: Literal[AgenticSystemTurnResponseEventType.turn_complete.value] = ( - AgenticSystemTurnResponseEventType.turn_complete.value +class AgentTurnResponseTurnCompletePayload(BaseModel): + event_type: Literal[AgentTurnResponseEventType.turn_complete.value] = ( + AgentTurnResponseEventType.turn_complete.value ) turn: Turn @json_schema_type -class AgenticSystemTurnResponseEvent(BaseModel): +class AgentTurnResponseEvent(BaseModel): """Streamed agent execution response.""" payload: Annotated[ Union[ - AgenticSystemTurnResponseStepStartPayload, - AgenticSystemTurnResponseStepProgressPayload, - AgenticSystemTurnResponseStepCompletePayload, - AgenticSystemTurnResponseTurnStartPayload, - AgenticSystemTurnResponseTurnCompletePayload, + AgentTurnResponseStepStartPayload, + AgentTurnResponseStepProgressPayload, + AgentTurnResponseStepCompletePayload, + AgentTurnResponseTurnStartPayload, + AgentTurnResponseTurnCompletePayload, ], Field(discriminator="event_type"), ] @json_schema_type -class AgenticSystemCreateResponse(BaseModel): +class AgentCreateResponse(BaseModel): agent_id: str @json_schema_type -class AgenticSystemSessionCreateResponse(BaseModel): +class AgentSessionCreateResponse(BaseModel): session_id: str @json_schema_type -class AgenticSystemTurnCreateRequest(AgentConfigOverridablePerTurn): +class AgentTurnCreateRequest(AgentConfigOverridablePerTurn): agent_id: str session_id: str @@ -397,24 +389,24 @@ class AgenticSystemTurnCreateRequest(AgentConfigOverridablePerTurn): @json_schema_type -class AgenticSystemTurnResponseStreamChunk(BaseModel): - event: AgenticSystemTurnResponseEvent +class AgentTurnResponseStreamChunk(BaseModel): + event: AgentTurnResponseEvent @json_schema_type -class AgenticSystemStepResponse(BaseModel): +class AgentStepResponse(BaseModel): step: Step -class AgenticSystem(Protocol): - @webmethod(route="/agentic_system/create") - async def create_agentic_system( +class Agents(Protocol): + @webmethod(route="/agents/create") + async def create_agent( self, agent_config: AgentConfig, - ) -> AgenticSystemCreateResponse: ... + ) -> AgentCreateResponse: ... - @webmethod(route="/agentic_system/turn/create") - async def create_agentic_system_turn( + @webmethod(route="/agents/turn/create") + async def create_agent_turn( self, agent_id: str, session_id: str, @@ -426,42 +418,40 @@ class AgenticSystem(Protocol): ], attachments: Optional[List[Attachment]] = None, stream: Optional[bool] = False, - ) -> AgenticSystemTurnResponseStreamChunk: ... + ) -> AgentTurnResponseStreamChunk: ... - @webmethod(route="/agentic_system/turn/get") - async def get_agentic_system_turn( + @webmethod(route="/agents/turn/get") + async def get_agents_turn( self, agent_id: str, turn_id: str, ) -> Turn: ... - @webmethod(route="/agentic_system/step/get") - async def get_agentic_system_step( + @webmethod(route="/agents/step/get") + async def get_agents_step( self, agent_id: str, turn_id: str, step_id: str - ) -> AgenticSystemStepResponse: ... + ) -> AgentStepResponse: ... - @webmethod(route="/agentic_system/session/create") - async def create_agentic_system_session( + @webmethod(route="/agents/session/create") + async def create_agent_session( self, agent_id: str, session_name: str, - ) -> AgenticSystemSessionCreateResponse: ... + ) -> AgentSessionCreateResponse: ... - @webmethod(route="/agentic_system/session/get") - async def get_agentic_system_session( + @webmethod(route="/agents/session/get") + async def get_agents_session( self, agent_id: str, session_id: str, turn_ids: Optional[List[str]] = None, ) -> Session: ... - @webmethod(route="/agentic_system/session/delete") - async def delete_agentic_system_session( - self, agent_id: str, session_id: str - ) -> None: ... + @webmethod(route="/agents/session/delete") + async def delete_agents_session(self, agent_id: str, session_id: str) -> None: ... - @webmethod(route="/agentic_system/delete") - async def delete_agentic_system( + @webmethod(route="/agents/delete") + async def delete_agents( self, agent_id: str, ) -> None: ... diff --git a/llama_stack/agentic_system/client.py b/llama_stack/apis/agents/client.py similarity index 83% rename from llama_stack/agentic_system/client.py rename to llama_stack/apis/agents/client.py index 49d887439..d02de6916 100644 --- a/llama_stack/agentic_system/client.py +++ b/llama_stack/apis/agents/client.py @@ -18,44 +18,42 @@ from termcolor import cprint from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.core.datatypes import RemoteProviderConfig -from .api import * # noqa: F403 +from .agents import * # noqa: F403 from .event_logger import EventLogger async def get_client_impl(config: RemoteProviderConfig, _deps): - return AgenticSystemClient(config.url) + return AgentsClient(config.url) def encodable_dict(d: BaseModel): return json.loads(d.json()) -class AgenticSystemClient(AgenticSystem): +class AgentsClient(Agents): def __init__(self, base_url: str): self.base_url = base_url - async def create_agentic_system( - self, agent_config: AgentConfig - ) -> AgenticSystemCreateResponse: + async def create_agent(self, agent_config: AgentConfig) -> AgentCreateResponse: async with httpx.AsyncClient() as client: response = await client.post( - f"{self.base_url}/agentic_system/create", + f"{self.base_url}/agents/create", json={ "agent_config": encodable_dict(agent_config), }, headers={"Content-Type": "application/json"}, ) response.raise_for_status() - return AgenticSystemCreateResponse(**response.json()) + return AgentCreateResponse(**response.json()) - async def create_agentic_system_session( + async def create_agent_session( self, agent_id: str, session_name: str, - ) -> AgenticSystemSessionCreateResponse: + ) -> AgentSessionCreateResponse: async with httpx.AsyncClient() as client: response = await client.post( - f"{self.base_url}/agentic_system/session/create", + f"{self.base_url}/agents/session/create", json={ "agent_id": agent_id, "session_name": session_name, @@ -63,16 +61,16 @@ class AgenticSystemClient(AgenticSystem): headers={"Content-Type": "application/json"}, ) response.raise_for_status() - return AgenticSystemSessionCreateResponse(**response.json()) + return AgentSessionCreateResponse(**response.json()) - async def create_agentic_system_turn( + async def create_agent_turn( self, - request: AgenticSystemTurnCreateRequest, + request: AgentTurnCreateRequest, ) -> AsyncGenerator: async with httpx.AsyncClient() as client: async with client.stream( "POST", - f"{self.base_url}/agentic_system/turn/create", + f"{self.base_url}/agents/turn/create", json=encodable_dict(request), headers={"Content-Type": "application/json"}, timeout=20, @@ -86,7 +84,7 @@ class AgenticSystemClient(AgenticSystem): cprint(data, "red") continue - yield AgenticSystemTurnResponseStreamChunk(**jdata) + yield AgentTurnResponseStreamChunk(**jdata) except Exception as e: print(data) print(f"Error with parsing or validation: {e}") @@ -102,16 +100,16 @@ async def _run_agent(api, tool_definitions, user_prompts, attachments=None): tool_prompt_format=ToolPromptFormat.function_tag, ) - create_response = await api.create_agentic_system(agent_config) - session_response = await api.create_agentic_system_session( + create_response = await api.create_agent(agent_config) + session_response = await api.create_agent_session( agent_id=create_response.agent_id, session_name="test_session", ) for content in user_prompts: cprint(f"User> {content}", color="white", attrs=["bold"]) - iterator = api.create_agentic_system_turn( - AgenticSystemTurnCreateRequest( + iterator = api.create_agent_turn( + AgentTurnCreateRequest( agent_id=create_response.agent_id, session_id=session_response.session_id, messages=[ @@ -128,7 +126,7 @@ async def _run_agent(api, tool_definitions, user_prompts, attachments=None): async def run_main(host: str, port: int): - api = AgenticSystemClient(f"http://{host}:{port}") + api = AgentsClient(f"http://{host}:{port}") tool_definitions = [ SearchToolDefinition(engine=SearchEngineType.bing), @@ -165,7 +163,7 @@ async def run_main(host: str, port: int): async def run_rag(host: str, port: int): - api = AgenticSystemClient(f"http://{host}:{port}") + api = AgentsClient(f"http://{host}:{port}") urls = [ "memory_optimizations.rst", diff --git a/llama_stack/apis/batch_inference/__init__.py b/llama_stack/apis/batch_inference/__init__.py new file mode 100644 index 000000000..3249475ee --- /dev/null +++ b/llama_stack/apis/batch_inference/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .batch_inference import * # noqa: F401 F403 diff --git a/llama_stack/batch_inference/api/api.py b/llama_stack/apis/batch_inference/batch_inference.py similarity index 97% rename from llama_stack/batch_inference/api/api.py rename to llama_stack/apis/batch_inference/batch_inference.py index 3c8f9c18b..0c3132812 100644 --- a/llama_stack/batch_inference/api/api.py +++ b/llama_stack/apis/batch_inference/batch_inference.py @@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 @json_schema_type diff --git a/llama_stack/apis/dataset/__init__.py b/llama_stack/apis/dataset/__init__.py new file mode 100644 index 000000000..33557a0ab --- /dev/null +++ b/llama_stack/apis/dataset/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .dataset import * # noqa: F401 F403 diff --git a/llama_stack/dataset/api/api.py b/llama_stack/apis/dataset/dataset.py similarity index 100% rename from llama_stack/dataset/api/api.py rename to llama_stack/apis/dataset/dataset.py diff --git a/llama_stack/dataset/api/__init__.py b/llama_stack/apis/evals/__init__.py similarity index 83% rename from llama_stack/dataset/api/__init__.py rename to llama_stack/apis/evals/__init__.py index a7e55ba91..d21b97d0a 100644 --- a/llama_stack/dataset/api/__init__.py +++ b/llama_stack/apis/evals/__init__.py @@ -4,4 +4,4 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from .api import * # noqa: F401 F403 +from .evals import * # noqa: F401 F403 diff --git a/llama_stack/evaluations/api/api.py b/llama_stack/apis/evals/evals.py similarity index 98% rename from llama_stack/evaluations/api/api.py rename to llama_stack/apis/evals/evals.py index c99ed0c72..d17ed9fdc 100644 --- a/llama_stack/evaluations/api/api.py +++ b/llama_stack/apis/evals/evals.py @@ -12,7 +12,7 @@ from llama_models.schema_utils import webmethod from pydantic import BaseModel from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.apis.dataset import * # noqa: F403 from llama_stack.common.training_types import * # noqa: F403 diff --git a/llama_stack/apis/inference/__init__.py b/llama_stack/apis/inference/__init__.py new file mode 100644 index 000000000..f9f77f769 --- /dev/null +++ b/llama_stack/apis/inference/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .inference import * # noqa: F401 F403 diff --git a/llama_stack/inference/client.py b/llama_stack/apis/inference/client.py similarity index 99% rename from llama_stack/inference/client.py rename to llama_stack/apis/inference/client.py index 8e9b77030..951f4596e 100644 --- a/llama_stack/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -10,12 +10,14 @@ from typing import Any, AsyncGenerator import fire import httpx + +from llama_stack.core.datatypes import RemoteProviderConfig from pydantic import BaseModel from termcolor import cprint -from llama_stack.core.datatypes import RemoteProviderConfig +from .event_logger import EventLogger -from .api import ( +from .inference import ( ChatCompletionRequest, ChatCompletionResponse, ChatCompletionResponseStreamChunk, @@ -23,7 +25,6 @@ from .api import ( Inference, UserMessage, ) -from .event_logger import EventLogger async def get_client_impl(config: RemoteProviderConfig, _deps: Any) -> Inference: diff --git a/llama_stack/inference/api/api.py b/llama_stack/apis/inference/inference.py similarity index 100% rename from llama_stack/inference/api/api.py rename to llama_stack/apis/inference/inference.py diff --git a/llama_stack/apis/memory/__init__.py b/llama_stack/apis/memory/__init__.py new file mode 100644 index 000000000..260862228 --- /dev/null +++ b/llama_stack/apis/memory/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .memory import * # noqa: F401 F403 diff --git a/llama_stack/memory/client.py b/llama_stack/apis/memory/client.py similarity index 99% rename from llama_stack/memory/client.py rename to llama_stack/apis/memory/client.py index f629fafe8..3862fc5ac 100644 --- a/llama_stack/memory/client.py +++ b/llama_stack/apis/memory/client.py @@ -13,11 +13,11 @@ from typing import Any, Dict, List, Optional import fire import httpx -from termcolor import cprint from llama_stack.core.datatypes import RemoteProviderConfig +from termcolor import cprint -from .api import * # noqa: F403 +from .memory import * # noqa: F403 from .common.file_utils import data_url_from_file diff --git a/llama_stack/memory/api/api.py b/llama_stack/apis/memory/memory.py similarity index 100% rename from llama_stack/memory/api/api.py rename to llama_stack/apis/memory/memory.py diff --git a/llama_stack/apis/models/__init__.py b/llama_stack/apis/models/__init__.py new file mode 100644 index 000000000..410d8d1f9 --- /dev/null +++ b/llama_stack/apis/models/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .models import * # noqa: F401 F403 diff --git a/llama_stack/models/api/endpoints.py b/llama_stack/apis/models/models.py similarity index 100% rename from llama_stack/models/api/endpoints.py rename to llama_stack/apis/models/models.py diff --git a/llama_stack/apis/post_training/__init__.py b/llama_stack/apis/post_training/__init__.py new file mode 100644 index 000000000..7129c4abd --- /dev/null +++ b/llama_stack/apis/post_training/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .post_training import * # noqa: F401 F403 diff --git a/llama_stack/post_training/api/api.py b/llama_stack/apis/post_training/post_training.py similarity index 99% rename from llama_stack/post_training/api/api.py rename to llama_stack/apis/post_training/post_training.py index d2f183c2b..2e7adf320 100644 --- a/llama_stack/post_training/api/api.py +++ b/llama_stack/apis/post_training/post_training.py @@ -14,7 +14,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.apis.dataset import * # noqa: F403 from llama_stack.common.training_types import * # noqa: F403 diff --git a/llama_stack/apis/reward_scoring/__init__.py b/llama_stack/apis/reward_scoring/__init__.py new file mode 100644 index 000000000..7ea62c241 --- /dev/null +++ b/llama_stack/apis/reward_scoring/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .reward_scoring import * # noqa: F401 F403 diff --git a/llama_stack/reward_scoring/api/api.py b/llama_stack/apis/reward_scoring/reward_scoring.py similarity index 100% rename from llama_stack/reward_scoring/api/api.py rename to llama_stack/apis/reward_scoring/reward_scoring.py diff --git a/llama_stack/apis/safety/__init__.py b/llama_stack/apis/safety/__init__.py new file mode 100644 index 000000000..dc3fe90b4 --- /dev/null +++ b/llama_stack/apis/safety/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .safety import * # noqa: F401 F403 diff --git a/llama_stack/safety/client.py b/llama_stack/apis/safety/client.py similarity index 98% rename from llama_stack/safety/client.py rename to llama_stack/apis/safety/client.py index 5ae59c66f..d09961a72 100644 --- a/llama_stack/safety/client.py +++ b/llama_stack/apis/safety/client.py @@ -13,12 +13,12 @@ import fire import httpx from llama_models.llama3.api.datatypes import UserMessage + +from llama_stack.core.datatypes import RemoteProviderConfig from pydantic import BaseModel from termcolor import cprint -from llama_stack.core.datatypes import RemoteProviderConfig - -from .api import * # noqa: F403 +from .safety import * # noqa: F403 async def get_client_impl(config: RemoteProviderConfig, _deps: Any) -> Safety: diff --git a/llama_stack/safety/api/api.py b/llama_stack/apis/safety/safety.py similarity index 100% rename from llama_stack/safety/api/api.py rename to llama_stack/apis/safety/safety.py diff --git a/llama_stack/apis/synthetic_data_generation/__init__.py b/llama_stack/apis/synthetic_data_generation/__init__.py new file mode 100644 index 000000000..cfdec76ce --- /dev/null +++ b/llama_stack/apis/synthetic_data_generation/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .synthetic_data_generation import * # noqa: F401 F403 diff --git a/llama_stack/synthetic_data_generation/api/api.py b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py similarity index 96% rename from llama_stack/synthetic_data_generation/api/api.py rename to llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py index f6059fc46..60c756128 100644 --- a/llama_stack/synthetic_data_generation/api/api.py +++ b/llama_stack/apis/synthetic_data_generation/synthetic_data_generation.py @@ -13,7 +13,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.reward_scoring.api import * # noqa: F403 +from llama_stack.apis.reward_scoring import * # noqa: F403 class FilteringFunction(Enum): diff --git a/llama_stack/apis/telemetry/__init__.py b/llama_stack/apis/telemetry/__init__.py new file mode 100644 index 000000000..6a111dc9e --- /dev/null +++ b/llama_stack/apis/telemetry/__init__.py @@ -0,0 +1,7 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .telemetry import * # noqa: F401 F403 diff --git a/llama_stack/telemetry/api/api.py b/llama_stack/apis/telemetry/telemetry.py similarity index 100% rename from llama_stack/telemetry/api/api.py rename to llama_stack/apis/telemetry/telemetry.py diff --git a/llama_stack/cli/llama.py b/llama_stack/cli/llama.py index 9a5530c0c..8ca82db81 100644 --- a/llama_stack/cli/llama.py +++ b/llama_stack/cli/llama.py @@ -31,16 +31,6 @@ class LlamaCLIParser: ModelParser.create(subparsers) StackParser.create(subparsers) - # Import sub-commands from agentic_system if they exist - try: - from llama_agentic_system.cli.subcommand_modules import SUBCOMMAND_MODULES - - for module in SUBCOMMAND_MODULES: - module.create(subparsers) - - except ImportError: - pass - def parse_args(self) -> argparse.Namespace: return self.parser.parse_args() diff --git a/llama_stack/configs/distributions/conda/local-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml index d588b6b5f..143335f51 100644 --- a/llama_stack/configs/distributions/conda/local-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-conda-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: meta-reference memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: conda diff --git a/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml index c3b38aebe..157b6d011 100644 --- a/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-fireworks-conda-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: remote::fireworks memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: conda diff --git a/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml index 31bc9d0e9..ad4966aa9 100644 --- a/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-ollama-conda-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: remote::ollama memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: conda diff --git a/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml index 1ac6f44ba..3439682ed 100644 --- a/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: remote::tgi memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: conda diff --git a/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml b/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml index 4aa13fed5..df6d8b3bf 100644 --- a/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml +++ b/llama_stack/configs/distributions/conda/local-together-conda-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: remote::together memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: conda diff --git a/llama_stack/configs/distributions/docker/local-docker-example-build.yaml b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml index f868aa98f..b94203d49 100644 --- a/llama_stack/configs/distributions/docker/local-docker-example-build.yaml +++ b/llama_stack/configs/distributions/docker/local-docker-example-build.yaml @@ -5,6 +5,6 @@ distribution_spec: inference: meta-reference memory: meta-reference-faiss safety: meta-reference - agentic_system: meta-reference + agents: meta-reference telemetry: console image_type: docker diff --git a/llama_stack/core/datatypes.py b/llama_stack/core/datatypes.py index 17c848d3f..a996d5ca9 100644 --- a/llama_stack/core/datatypes.py +++ b/llama_stack/core/datatypes.py @@ -17,7 +17,7 @@ from pydantic import BaseModel, Field, validator class Api(Enum): inference = "inference" safety = "safety" - agentic_system = "agentic_system" + agents = "agents" memory = "memory" telemetry = "telemetry" diff --git a/llama_stack/core/distribution.py b/llama_stack/core/distribution.py index 83ac4f045..affcf175f 100644 --- a/llama_stack/core/distribution.py +++ b/llama_stack/core/distribution.py @@ -8,11 +8,11 @@ import importlib import inspect from typing import Dict, List -from llama_stack.agentic_system.api import AgenticSystem -from llama_stack.inference.api import Inference -from llama_stack.memory.api import Memory -from llama_stack.safety.api import Safety -from llama_stack.telemetry.api import Telemetry +from llama_stack.apis.agents import Agents +from llama_stack.apis.inference import Inference +from llama_stack.apis.memory import Memory +from llama_stack.apis.safety import Safety +from llama_stack.apis.telemetry import Telemetry from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec @@ -34,7 +34,7 @@ def api_endpoints() -> Dict[Api, List[ApiEndpoint]]: protocols = { Api.inference: Inference, Api.safety: Safety, - Api.agentic_system: AgenticSystem, + Api.agents: Agents, Api.memory: Memory, Api.telemetry: Telemetry, } @@ -67,7 +67,7 @@ def api_providers() -> Dict[Api, Dict[str, ProviderSpec]]: ret = {} for api in stack_apis(): name = api.name.lower() - module = importlib.import_module(f"llama_stack.{name}.providers") + module = importlib.import_module(f"llama_stack.providers.registry.{name}") ret[api] = { "remote": remote_provider_spec(api), **{a.provider_id: a for a in module.available_providers()}, diff --git a/llama_stack/core/server.py b/llama_stack/core/server.py index 7a40184ec..914a663a5 100644 --- a/llama_stack/core/server.py +++ b/llama_stack/core/server.py @@ -39,7 +39,7 @@ from pydantic import BaseModel, ValidationError from termcolor import cprint from typing_extensions import Annotated -from llama_stack.telemetry.tracing import ( +from llama_stack.providers.utils.telemetry.tracing import ( end_trace, setup_logger, SpanStatus, diff --git a/llama_stack/inference/api/__init__.py b/llama_stack/inference/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/inference/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/memory/api/__init__.py b/llama_stack/memory/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/memory/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/post_training/api/__init__.py b/llama_stack/post_training/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/post_training/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/agentic_system/__init__.py b/llama_stack/providers/__init__.py similarity index 100% rename from llama_stack/agentic_system/__init__.py rename to llama_stack/providers/__init__.py diff --git a/llama_stack/batch_inference/__init__.py b/llama_stack/providers/adapters/__init__.py similarity index 100% rename from llama_stack/batch_inference/__init__.py rename to llama_stack/providers/adapters/__init__.py diff --git a/llama_stack/inference/__init__.py b/llama_stack/providers/adapters/inference/__init__.py similarity index 100% rename from llama_stack/inference/__init__.py rename to llama_stack/providers/adapters/inference/__init__.py diff --git a/llama_stack/inference/adapters/fireworks/__init__.py b/llama_stack/providers/adapters/inference/fireworks/__init__.py similarity index 100% rename from llama_stack/inference/adapters/fireworks/__init__.py rename to llama_stack/providers/adapters/inference/fireworks/__init__.py diff --git a/llama_stack/inference/adapters/fireworks/config.py b/llama_stack/providers/adapters/inference/fireworks/config.py similarity index 100% rename from llama_stack/inference/adapters/fireworks/config.py rename to llama_stack/providers/adapters/inference/fireworks/config.py diff --git a/llama_stack/inference/adapters/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py similarity index 97% rename from llama_stack/inference/adapters/fireworks/fireworks.py rename to llama_stack/providers/adapters/inference/fireworks/fireworks.py index 6ebb8d5a3..1e6f2e753 100644 --- a/llama_stack/inference/adapters/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -6,15 +6,16 @@ from typing import AsyncGenerator -from fireworks.client import Fireworks from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.inference.prepare_messages import prepare_messages +from fireworks.client import Fireworks + +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.utils.inference.prepare_messages import prepare_messages from .config import FireworksImplConfig @@ -81,7 +82,7 @@ class FireworksInferenceAdapter(Inference): model: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), - tools: Optional[List[ToolDefinition]] = list(), + tools: Optional[List[ToolDefinition]] = None, tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, @@ -91,7 +92,7 @@ class FireworksInferenceAdapter(Inference): model=model, messages=messages, sampling_params=sampling_params, - tools=tools, + tools=tools or [], tool_choice=tool_choice, tool_prompt_format=tool_prompt_format, stream=stream, diff --git a/llama_stack/inference/adapters/ollama/__init__.py b/llama_stack/providers/adapters/inference/ollama/__init__.py similarity index 100% rename from llama_stack/inference/adapters/ollama/__init__.py rename to llama_stack/providers/adapters/inference/ollama/__init__.py diff --git a/llama_stack/inference/adapters/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py similarity index 97% rename from llama_stack/inference/adapters/ollama/ollama.py rename to llama_stack/providers/adapters/inference/ollama/ollama.py index 236421a2f..ea726ff75 100644 --- a/llama_stack/inference/adapters/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -12,10 +12,11 @@ from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model + from ollama import AsyncClient -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.inference.prepare_messages import prepare_messages +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.utils.inference.prepare_messages import prepare_messages # TODO: Eventually this will move to the llama cli model list command # mapping of Model SKUs to ollama models @@ -89,7 +90,7 @@ class OllamaInferenceAdapter(Inference): model: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), - tools: Optional[List[ToolDefinition]] = list(), + tools: Optional[List[ToolDefinition]] = None, tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, @@ -99,7 +100,7 @@ class OllamaInferenceAdapter(Inference): model=model, messages=messages, sampling_params=sampling_params, - tools=tools, + tools=tools or [], tool_choice=tool_choice, tool_prompt_format=tool_prompt_format, stream=stream, diff --git a/llama_stack/inference/adapters/tgi/__init__.py b/llama_stack/providers/adapters/inference/tgi/__init__.py similarity index 100% rename from llama_stack/inference/adapters/tgi/__init__.py rename to llama_stack/providers/adapters/inference/tgi/__init__.py diff --git a/llama_stack/inference/adapters/tgi/config.py b/llama_stack/providers/adapters/inference/tgi/config.py similarity index 100% rename from llama_stack/inference/adapters/tgi/config.py rename to llama_stack/providers/adapters/inference/tgi/config.py diff --git a/llama_stack/inference/adapters/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py similarity index 97% rename from llama_stack/inference/adapters/tgi/tgi.py rename to llama_stack/providers/adapters/inference/tgi/tgi.py index 7ca2e1b14..3be1f3e98 100644 --- a/llama_stack/inference/adapters/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -13,8 +13,8 @@ from huggingface_hub import HfApi, InferenceClient from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import StopReason from llama_models.llama3.api.tokenizer import Tokenizer -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.inference.prepare_messages import prepare_messages +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.utils.inference.prepare_messages import prepare_messages from .config import TGIImplConfig @@ -87,7 +87,7 @@ class TGIAdapter(Inference): model: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), - tools: Optional[List[ToolDefinition]] = list(), + tools: Optional[List[ToolDefinition]] = None, tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, @@ -97,7 +97,7 @@ class TGIAdapter(Inference): model=model, messages=messages, sampling_params=sampling_params, - tools=tools, + tools=tools or [], tool_choice=tool_choice, tool_prompt_format=tool_prompt_format, stream=stream, diff --git a/llama_stack/inference/adapters/together/__init__.py b/llama_stack/providers/adapters/inference/together/__init__.py similarity index 100% rename from llama_stack/inference/adapters/together/__init__.py rename to llama_stack/providers/adapters/inference/together/__init__.py diff --git a/llama_stack/inference/adapters/together/config.py b/llama_stack/providers/adapters/inference/together/config.py similarity index 100% rename from llama_stack/inference/adapters/together/config.py rename to llama_stack/providers/adapters/inference/together/config.py diff --git a/llama_stack/inference/adapters/together/together.py b/llama_stack/providers/adapters/inference/together/together.py similarity index 97% rename from llama_stack/inference/adapters/together/together.py rename to llama_stack/providers/adapters/inference/together/together.py index 739b437ca..565130883 100644 --- a/llama_stack/inference/adapters/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -11,10 +11,11 @@ from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model + from together import Together -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.inference.prepare_messages import prepare_messages +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.utils.inference.prepare_messages import prepare_messages from .config import TogetherImplConfig @@ -81,7 +82,7 @@ class TogetherInferenceAdapter(Inference): model: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), - tools: Optional[List[ToolDefinition]] = list(), + tools: Optional[List[ToolDefinition]] = None, tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, @@ -92,7 +93,7 @@ class TogetherInferenceAdapter(Inference): model=model, messages=messages, sampling_params=sampling_params, - tools=tools, + tools=tools or [], tool_choice=tool_choice, tool_prompt_format=tool_prompt_format, stream=stream, diff --git a/llama_stack/inference/adapters/__init__.py b/llama_stack/providers/adapters/memory/__init__.py similarity index 100% rename from llama_stack/inference/adapters/__init__.py rename to llama_stack/providers/adapters/memory/__init__.py diff --git a/llama_stack/memory/adapters/chroma/__init__.py b/llama_stack/providers/adapters/memory/chroma/__init__.py similarity index 100% rename from llama_stack/memory/adapters/chroma/__init__.py rename to llama_stack/providers/adapters/memory/chroma/__init__.py diff --git a/llama_stack/memory/adapters/chroma/chroma.py b/llama_stack/providers/adapters/memory/chroma/chroma.py similarity index 97% rename from llama_stack/memory/adapters/chroma/chroma.py rename to llama_stack/providers/adapters/memory/chroma/chroma.py index 0c39fd9c0..15f5810a9 100644 --- a/llama_stack/memory/adapters/chroma/chroma.py +++ b/llama_stack/providers/adapters/memory/chroma/chroma.py @@ -12,10 +12,13 @@ from urllib.parse import urlparse import chromadb from numpy.typing import NDArray -from llama_stack.memory.api import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 -from llama_stack.memory.common.vector_store import BankWithIndex, EmbeddingIndex +from llama_stack.providers.utils.memory.vector_store import ( + BankWithIndex, + EmbeddingIndex, +) class ChromaIndex(EmbeddingIndex): diff --git a/llama_stack/memory/adapters/pgvector/__init__.py b/llama_stack/providers/adapters/memory/pgvector/__init__.py similarity index 100% rename from llama_stack/memory/adapters/pgvector/__init__.py rename to llama_stack/providers/adapters/memory/pgvector/__init__.py diff --git a/llama_stack/memory/adapters/pgvector/config.py b/llama_stack/providers/adapters/memory/pgvector/config.py similarity index 100% rename from llama_stack/memory/adapters/pgvector/config.py rename to llama_stack/providers/adapters/memory/pgvector/config.py diff --git a/llama_stack/memory/adapters/pgvector/pgvector.py b/llama_stack/providers/adapters/memory/pgvector/pgvector.py similarity index 98% rename from llama_stack/memory/adapters/pgvector/pgvector.py rename to llama_stack/providers/adapters/memory/pgvector/pgvector.py index a8dadb0f3..a5c84a1b2 100644 --- a/llama_stack/memory/adapters/pgvector/pgvector.py +++ b/llama_stack/providers/adapters/memory/pgvector/pgvector.py @@ -13,10 +13,10 @@ from numpy.typing import NDArray from psycopg2 import sql from psycopg2.extras import execute_values, Json from pydantic import BaseModel -from llama_stack.memory.api import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 -from llama_stack.memory.common.vector_store import ( +from llama_stack.providers.utils.memory.vector_store import ( ALL_MINILM_L6_V2_DIMENSION, BankWithIndex, EmbeddingIndex, diff --git a/llama_stack/memory/__init__.py b/llama_stack/providers/impls/__init__.py similarity index 100% rename from llama_stack/memory/__init__.py rename to llama_stack/providers/impls/__init__.py diff --git a/llama_stack/agentic_system/meta_reference/__init__.py b/llama_stack/providers/impls/meta_reference/agents/__init__.py similarity index 86% rename from llama_stack/agentic_system/meta_reference/__init__.py rename to llama_stack/providers/impls/meta_reference/agents/__init__.py index 686708c0c..d18cd1138 100644 --- a/llama_stack/agentic_system/meta_reference/__init__.py +++ b/llama_stack/providers/impls/meta_reference/agents/__init__.py @@ -14,13 +14,13 @@ from .config import MetaReferenceImplConfig async def get_provider_impl( config: MetaReferenceImplConfig, deps: Dict[Api, ProviderSpec] ): - from .agentic_system import MetaReferenceAgenticSystemImpl + from .agents import MetaReferenceAgentsImpl assert isinstance( config, MetaReferenceImplConfig ), f"Unexpected config type: {type(config)}" - impl = MetaReferenceAgenticSystemImpl( + impl = MetaReferenceAgentsImpl( config, deps[Api.inference], deps[Api.memory], diff --git a/llama_stack/agentic_system/meta_reference/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py similarity index 87% rename from llama_stack/agentic_system/meta_reference/agent_instance.py rename to llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 141174cce..a8b826972 100644 --- a/llama_stack/agentic_system/meta_reference/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -20,10 +20,10 @@ import httpx from termcolor import cprint -from llama_stack.agentic_system.api import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.memory.api import * # noqa: F403 -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.agents import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 from llama_stack.tools.base import BaseTool from llama_stack.tools.builtin import ( @@ -122,7 +122,7 @@ class ChatAgent(ShieldRunnerMixin): return session async def create_and_execute_turn( - self, request: AgenticSystemTurnCreateRequest + self, request: AgentTurnCreateRequest ) -> AsyncGenerator: assert ( request.session_id in self.sessions @@ -141,9 +141,9 @@ class ChatAgent(ShieldRunnerMixin): turn_id = str(uuid.uuid4()) start_time = datetime.now() - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseTurnStartPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseTurnStartPayload( turn_id=turn_id, ) ) @@ -169,12 +169,12 @@ class ChatAgent(ShieldRunnerMixin): continue assert isinstance( - chunk, AgenticSystemTurnResponseStreamChunk + chunk, AgentTurnResponseStreamChunk ), f"Unexpected type {type(chunk)}" event = chunk.event if ( event.payload.event_type - == AgenticSystemTurnResponseEventType.step_complete.value + == AgentTurnResponseEventType.step_complete.value ): steps.append(event.payload.step_details) @@ -193,9 +193,9 @@ class ChatAgent(ShieldRunnerMixin): ) session.turns.append(turn) - chunk = AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseTurnCompletePayload( + chunk = AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseTurnCompletePayload( turn=turn, ) ) @@ -261,9 +261,9 @@ class ChatAgent(ShieldRunnerMixin): step_id = str(uuid.uuid4()) try: - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepStartPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepStartPayload( step_type=StepType.shield_call.value, step_id=step_id, metadata=dict(touchpoint=touchpoint), @@ -273,9 +273,9 @@ class ChatAgent(ShieldRunnerMixin): await self.run_shields(messages, shields) except SafetyException as e: - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.shield_call.value, step_details=ShieldCallStep( step_id=step_id, @@ -292,9 +292,9 @@ class ChatAgent(ShieldRunnerMixin): ) yield False - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.shield_call.value, step_details=ShieldCallStep( step_id=step_id, @@ -325,9 +325,9 @@ class ChatAgent(ShieldRunnerMixin): ) if need_rag_context: step_id = str(uuid.uuid4()) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepStartPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepStartPayload( step_type=StepType.memory_retrieval.value, step_id=step_id, ) @@ -341,9 +341,9 @@ class ChatAgent(ShieldRunnerMixin): ) step_id = str(uuid.uuid4()) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.memory_retrieval.value, step_id=step_id, step_details=MemoryRetrievalStep( @@ -360,7 +360,7 @@ class ChatAgent(ShieldRunnerMixin): last_message = input_messages[-1] last_message.context = "\n".join(rag_context) - elif attachments and AgenticSystemTool.code_interpreter.value in enabled_tools: + elif attachments and AgentTool.code_interpreter.value in enabled_tools: urls = [a.content for a in attachments if isinstance(a.content, URL)] msg = await attachment_message(self.tempdir, urls) input_messages.append(msg) @@ -379,9 +379,9 @@ class ChatAgent(ShieldRunnerMixin): cprint(f"{str(msg)}", color=color) step_id = str(uuid.uuid4()) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepStartPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepStartPayload( step_type=StepType.inference.value, step_id=step_id, ) @@ -412,9 +412,9 @@ class ChatAgent(ShieldRunnerMixin): tool_calls.append(delta.content) if stream: - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepProgressPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepProgressPayload( step_type=StepType.inference.value, step_id=step_id, model_response_text_delta="", @@ -426,9 +426,9 @@ class ChatAgent(ShieldRunnerMixin): elif isinstance(delta, str): content += delta if stream and event.stop_reason is None: - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepProgressPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepProgressPayload( step_type=StepType.inference.value, step_id=step_id, model_response_text_delta=event.delta, @@ -448,9 +448,9 @@ class ChatAgent(ShieldRunnerMixin): tool_calls=tool_calls, ) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.inference.value, step_id=step_id, step_details=InferenceStep( @@ -498,17 +498,17 @@ class ChatAgent(ShieldRunnerMixin): return step_id = str(uuid.uuid4()) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepStartPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepStartPayload( step_type=StepType.tool_execution.value, step_id=step_id, ) ) ) - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepProgressPayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepProgressPayload( step_type=StepType.tool_execution.value, step_id=step_id, tool_call=tool_call, @@ -525,9 +525,9 @@ class ChatAgent(ShieldRunnerMixin): ), "Currently not supporting multiple messages" result_message = result_messages[0] - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.tool_execution.value, step_details=ToolExecutionStep( step_id=step_id, @@ -547,9 +547,9 @@ class ChatAgent(ShieldRunnerMixin): # TODO: add tool-input touchpoint and a "start" event for this step also # but that needs a lot more refactoring of Tool code potentially - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.shield_call.value, step_details=ShieldCallStep( step_id=str(uuid.uuid4()), @@ -566,9 +566,9 @@ class ChatAgent(ShieldRunnerMixin): ) except SafetyException as e: - yield AgenticSystemTurnResponseStreamChunk( - event=AgenticSystemTurnResponseEvent( - payload=AgenticSystemTurnResponseStepCompletePayload( + yield AgentTurnResponseStreamChunk( + event=AgentTurnResponseEvent( + payload=AgentTurnResponseStepCompletePayload( step_type=StepType.shield_call.value, step_details=ShieldCallStep( step_id=str(uuid.uuid4()), @@ -616,18 +616,18 @@ class ChatAgent(ShieldRunnerMixin): enabled_tools = set(t.type for t in self.agent_config.tools) if attachments: if ( - AgenticSystemTool.code_interpreter.value in enabled_tools + AgentTool.code_interpreter.value in enabled_tools and self.agent_config.tool_choice == ToolChoice.required ): return False else: return True - return AgenticSystemTool.memory.value in enabled_tools + return AgentTool.memory.value in enabled_tools def _memory_tool_definition(self) -> Optional[MemoryToolDefinition]: for t in self.agent_config.tools: - if t.type == AgenticSystemTool.memory.value: + if t.type == AgentTool.memory.value: return t return None diff --git a/llama_stack/agentic_system/meta_reference/agentic_system.py b/llama_stack/providers/impls/meta_reference/agents/agents.py similarity index 87% rename from llama_stack/agentic_system/meta_reference/agentic_system.py rename to llama_stack/providers/impls/meta_reference/agents/agents.py index 7f624cfa6..d77a31bb0 100644 --- a/llama_stack/agentic_system/meta_reference/agentic_system.py +++ b/llama_stack/providers/impls/meta_reference/agents/agents.py @@ -10,10 +10,10 @@ import tempfile import uuid from typing import AsyncGenerator -from llama_stack.inference.api import Inference -from llama_stack.memory.api import Memory -from llama_stack.safety.api import Safety -from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.apis.inference import Inference +from llama_stack.apis.memory import Memory +from llama_stack.apis.safety import Safety +from llama_stack.apis.agents import * # noqa: F403 from llama_stack.tools.builtin import ( CodeInterpreterTool, PhotogenTool, @@ -33,7 +33,7 @@ logger.setLevel(logging.INFO) AGENT_INSTANCES_BY_ID = {} -class MetaReferenceAgenticSystemImpl(AgenticSystem): +class MetaReferenceAgentsImpl(Agents): def __init__( self, config: MetaReferenceImplConfig, @@ -49,10 +49,10 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem): async def initialize(self) -> None: pass - async def create_agentic_system( + async def create_agent( self, agent_config: AgentConfig, - ) -> AgenticSystemCreateResponse: + ) -> AgentCreateResponse: agent_id = str(uuid.uuid4()) builtin_tools = [] @@ -95,24 +95,24 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem): builtin_tools=builtin_tools, ) - return AgenticSystemCreateResponse( + return AgentCreateResponse( agent_id=agent_id, ) - async def create_agentic_system_session( + async def create_agent_session( self, agent_id: str, session_name: str, - ) -> AgenticSystemSessionCreateResponse: + ) -> AgentSessionCreateResponse: assert agent_id in AGENT_INSTANCES_BY_ID, f"System {agent_id} not found" agent = AGENT_INSTANCES_BY_ID[agent_id] session = agent.create_session(session_name) - return AgenticSystemSessionCreateResponse( + return AgentSessionCreateResponse( session_id=session.session_id, ) - async def create_agentic_system_turn( + async def create_agent_turn( self, agent_id: str, session_id: str, @@ -126,7 +126,7 @@ class MetaReferenceAgenticSystemImpl(AgenticSystem): stream: Optional[bool] = False, ) -> AsyncGenerator: # wrapper request to make it easier to pass around (internal only, not exposed to API) - request = AgenticSystemTurnCreateRequest( + request = AgentTurnCreateRequest( agent_id=agent_id, session_id=session_id, messages=messages, diff --git a/llama_stack/agentic_system/meta_reference/config.py b/llama_stack/providers/impls/meta_reference/agents/config.py similarity index 100% rename from llama_stack/agentic_system/meta_reference/config.py rename to llama_stack/providers/impls/meta_reference/agents/config.py diff --git a/llama_stack/agentic_system/meta_reference/rag/context_retriever.py b/llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py similarity index 95% rename from llama_stack/agentic_system/meta_reference/rag/context_retriever.py rename to llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py index 7723fe5a5..5ebb94a31 100644 --- a/llama_stack/agentic_system/meta_reference/rag/context_retriever.py +++ b/llama_stack/providers/impls/meta_reference/agents/rag/context_retriever.py @@ -10,14 +10,14 @@ from jinja2 import Template from llama_models.llama3.api import * # noqa: F403 -from llama_stack.agentic_system.api import ( +from llama_stack.apis.agents import ( DefaultMemoryQueryGeneratorConfig, LLMMemoryQueryGeneratorConfig, MemoryQueryGenerator, MemoryQueryGeneratorConfig, ) from termcolor import cprint # noqa: F401 -from llama_stack.inference.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 async def generate_rag_query( diff --git a/llama_stack/agentic_system/meta_reference/safety.py b/llama_stack/providers/impls/meta_reference/agents/safety.py similarity index 98% rename from llama_stack/agentic_system/meta_reference/safety.py rename to llama_stack/providers/impls/meta_reference/agents/safety.py index 74da5c2cc..f7148ddce 100644 --- a/llama_stack/agentic_system/meta_reference/safety.py +++ b/llama_stack/providers/impls/meta_reference/agents/safety.py @@ -7,15 +7,15 @@ from typing import List from llama_models.llama3.api.datatypes import Message, Role, UserMessage -from termcolor import cprint -from llama_stack.safety.api import ( +from llama_stack.apis.safety import ( OnViolationAction, RunShieldRequest, Safety, ShieldDefinition, ShieldResponse, ) +from termcolor import cprint class SafetyException(Exception): # noqa: N818 diff --git a/llama_stack/inference/meta_reference/__init__.py b/llama_stack/providers/impls/meta_reference/inference/__init__.py similarity index 100% rename from llama_stack/inference/meta_reference/__init__.py rename to llama_stack/providers/impls/meta_reference/inference/__init__.py diff --git a/llama_stack/inference/meta_reference/config.py b/llama_stack/providers/impls/meta_reference/inference/config.py similarity index 96% rename from llama_stack/inference/meta_reference/config.py rename to llama_stack/providers/impls/meta_reference/inference/config.py index 7da01a0f4..27943cb2c 100644 --- a/llama_stack/inference/meta_reference/config.py +++ b/llama_stack/providers/impls/meta_reference/inference/config.py @@ -11,9 +11,9 @@ from llama_models.datatypes import ModelFamily from llama_models.schema_utils import json_schema_type from llama_models.sku_list import all_registered_models, resolve_model -from pydantic import BaseModel, Field, field_validator +from llama_stack.apis.inference import QuantizationConfig -from llama_stack.inference.api import QuantizationConfig +from pydantic import BaseModel, Field, field_validator @json_schema_type diff --git a/llama_stack/inference/meta_reference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py similarity index 99% rename from llama_stack/inference/meta_reference/generation.py rename to llama_stack/providers/impls/meta_reference/inference/generation.py index f7b077f54..c525bc08c 100644 --- a/llama_stack/inference/meta_reference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -28,10 +28,10 @@ from llama_models.llama3.api.datatypes import Message, ToolPromptFormat from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.reference_impl.model import Transformer from llama_models.sku_list import resolve_model -from termcolor import cprint +from llama_stack.apis.inference import QuantizationType from llama_stack.common.model_utils import model_local_dir -from llama_stack.inference.api import QuantizationType +from termcolor import cprint from .config import MetaReferenceImplConfig diff --git a/llama_stack/inference/meta_reference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py similarity index 96% rename from llama_stack/inference/meta_reference/inference.py rename to llama_stack/providers/impls/meta_reference/inference/inference.py index 66a97f7fb..597a4cb55 100644 --- a/llama_stack/inference/meta_reference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -11,7 +11,7 @@ from typing import AsyncIterator, Union from llama_models.llama3.api.datatypes import StopReason from llama_models.sku_list import resolve_model -from llama_stack.inference.api import ( +from llama_stack.apis.inference import ( ChatCompletionRequest, ChatCompletionResponse, ChatCompletionResponseEvent, @@ -21,13 +21,13 @@ from llama_stack.inference.api import ( ToolCallDelta, ToolCallParseStatus, ) -from llama_stack.inference.prepare_messages import prepare_messages +from llama_stack.providers.utils.inference.prepare_messages import prepare_messages from .config import MetaReferenceImplConfig from .model_parallel import LlamaModelParallelGenerator from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 # there's a single model parallel process running serving the model. for now, # we don't support multiple concurrent requests to this process. @@ -57,7 +57,7 @@ class MetaReferenceInferenceImpl(Inference): model: str, messages: List[Message], sampling_params: Optional[SamplingParams] = SamplingParams(), - tools: Optional[List[ToolDefinition]] = list(), + tools: Optional[List[ToolDefinition]] = None, tool_choice: Optional[ToolChoice] = ToolChoice.auto, tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, @@ -70,7 +70,7 @@ class MetaReferenceInferenceImpl(Inference): model=model, messages=messages, sampling_params=sampling_params, - tools=tools, + tools=tools or [], tool_choice=tool_choice, tool_prompt_format=tool_prompt_format, stream=stream, diff --git a/llama_stack/inference/meta_reference/model_parallel.py b/llama_stack/providers/impls/meta_reference/inference/model_parallel.py similarity index 100% rename from llama_stack/inference/meta_reference/model_parallel.py rename to llama_stack/providers/impls/meta_reference/inference/model_parallel.py diff --git a/llama_stack/inference/meta_reference/parallel_utils.py b/llama_stack/providers/impls/meta_reference/inference/parallel_utils.py similarity index 100% rename from llama_stack/inference/meta_reference/parallel_utils.py rename to llama_stack/providers/impls/meta_reference/inference/parallel_utils.py diff --git a/llama_stack/inference/quantization/fp8_impls.py b/llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py similarity index 100% rename from llama_stack/inference/quantization/fp8_impls.py rename to llama_stack/providers/impls/meta_reference/inference/quantization/fp8_impls.py diff --git a/llama_stack/inference/quantization/loader.py b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py similarity index 97% rename from llama_stack/inference/quantization/loader.py rename to llama_stack/providers/impls/meta_reference/inference/quantization/loader.py index 1b98bb9ad..9d28c9853 100644 --- a/llama_stack/inference/quantization/loader.py +++ b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py @@ -14,9 +14,9 @@ import torch from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region from llama_models.llama3.api.model import Transformer, TransformerBlock -from llama_stack.inference.api import QuantizationType +from llama_stack.apis.inference import QuantizationType -from llama_stack.inference.api.config import ( +from llama_stack.apis.inference.config import ( CheckpointQuantizationFormat, MetaReferenceImplConfig, ) diff --git a/llama_stack/inference/quantization/scripts/build_conda.sh b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh similarity index 100% rename from llama_stack/inference/quantization/scripts/build_conda.sh rename to llama_stack/providers/impls/meta_reference/inference/quantization/scripts/build_conda.sh diff --git a/llama_stack/inference/quantization/scripts/quantize_checkpoint.py b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py similarity index 100% rename from llama_stack/inference/quantization/scripts/quantize_checkpoint.py rename to llama_stack/providers/impls/meta_reference/inference/quantization/scripts/quantize_checkpoint.py diff --git a/llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh b/llama_stack/providers/impls/meta_reference/inference/quantization/scripts/run_quantize_checkpoint.sh similarity index 100% rename from llama_stack/inference/quantization/scripts/run_quantize_checkpoint.sh rename to llama_stack/providers/impls/meta_reference/inference/quantization/scripts/run_quantize_checkpoint.sh diff --git a/llama_stack/inference/quantization/test_fp8.py b/llama_stack/providers/impls/meta_reference/inference/quantization/test_fp8.py similarity index 100% rename from llama_stack/inference/quantization/test_fp8.py rename to llama_stack/providers/impls/meta_reference/inference/quantization/test_fp8.py diff --git a/llama_stack/memory/meta_reference/faiss/__init__.py b/llama_stack/providers/impls/meta_reference/memory/__init__.py similarity index 100% rename from llama_stack/memory/meta_reference/faiss/__init__.py rename to llama_stack/providers/impls/meta_reference/memory/__init__.py diff --git a/llama_stack/memory/meta_reference/faiss/config.py b/llama_stack/providers/impls/meta_reference/memory/config.py similarity index 100% rename from llama_stack/memory/meta_reference/faiss/config.py rename to llama_stack/providers/impls/meta_reference/memory/config.py diff --git a/llama_stack/memory/meta_reference/faiss/faiss.py b/llama_stack/providers/impls/meta_reference/memory/faiss.py similarity index 95% rename from llama_stack/memory/meta_reference/faiss/faiss.py rename to llama_stack/providers/impls/meta_reference/memory/faiss.py index dec8bd2df..ee716430e 100644 --- a/llama_stack/memory/meta_reference/faiss/faiss.py +++ b/llama_stack/providers/impls/meta_reference/memory/faiss.py @@ -15,13 +15,14 @@ from numpy.typing import NDArray from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.memory.api import * # noqa: F403 -from llama_stack.memory.common.vector_store import ( +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.providers.utils.memory.vector_store import ( ALL_MINILM_L6_V2_DIMENSION, BankWithIndex, EmbeddingIndex, ) -from llama_stack.telemetry import tracing +from llama_stack.providers.utils.telemetry import tracing + from .config import FaissImplConfig logger = logging.getLogger(__name__) diff --git a/llama_stack/safety/meta_reference/__init__.py b/llama_stack/providers/impls/meta_reference/safety/__init__.py similarity index 100% rename from llama_stack/safety/meta_reference/__init__.py rename to llama_stack/providers/impls/meta_reference/safety/__init__.py diff --git a/llama_stack/safety/meta_reference/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py similarity index 100% rename from llama_stack/safety/meta_reference/config.py rename to llama_stack/providers/impls/meta_reference/safety/config.py diff --git a/llama_stack/safety/meta_reference/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py similarity index 98% rename from llama_stack/safety/meta_reference/safety.py rename to llama_stack/providers/impls/meta_reference/safety/safety.py index 209b2975b..01899130b 100644 --- a/llama_stack/safety/meta_reference/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -9,7 +9,7 @@ import asyncio from llama_models.sku_list import resolve_model from llama_stack.common.model_utils import model_local_dir -from llama_stack.safety.api import * # noqa +from llama_stack.apis.safety import * # noqa from .config import SafetyConfig from .shields import ( diff --git a/llama_stack/safety/meta_reference/shields/__init__.py b/llama_stack/providers/impls/meta_reference/safety/shields/__init__.py similarity index 100% rename from llama_stack/safety/meta_reference/shields/__init__.py rename to llama_stack/providers/impls/meta_reference/safety/shields/__init__.py diff --git a/llama_stack/safety/meta_reference/shields/base.py b/llama_stack/providers/impls/meta_reference/safety/shields/base.py similarity index 97% rename from llama_stack/safety/meta_reference/shields/base.py rename to llama_stack/providers/impls/meta_reference/safety/shields/base.py index d6480259a..64e64e2fd 100644 --- a/llama_stack/safety/meta_reference/shields/base.py +++ b/llama_stack/providers/impls/meta_reference/safety/shields/base.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from typing import List from llama_models.llama3.api.datatypes import interleaved_text_media_as_str, Message -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 CANNED_RESPONSE_TEXT = "I can't answer that. Can I help with something else?" diff --git a/llama_stack/safety/meta_reference/shields/code_scanner.py b/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py similarity index 95% rename from llama_stack/safety/meta_reference/shields/code_scanner.py rename to llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py index 32f52a6dc..75ec7c37b 100644 --- a/llama_stack/safety/meta_reference/shields/code_scanner.py +++ b/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py @@ -8,7 +8,7 @@ from codeshield.cs import CodeShield from termcolor import cprint from .base import ShieldResponse, TextShield -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 class CodeScannerShield(TextShield): diff --git a/llama_stack/memory/common/__init__.py b/llama_stack/providers/impls/meta_reference/safety/shields/contrib/__init__.py similarity index 100% rename from llama_stack/memory/common/__init__.py rename to llama_stack/providers/impls/meta_reference/safety/shields/contrib/__init__.py diff --git a/llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py b/llama_stack/providers/impls/meta_reference/safety/shields/contrib/third_party_shield.py similarity index 100% rename from llama_stack/safety/meta_reference/shields/contrib/third_party_shield.py rename to llama_stack/providers/impls/meta_reference/safety/shields/contrib/third_party_shield.py diff --git a/llama_stack/safety/meta_reference/shields/llama_guard.py b/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py similarity index 99% rename from llama_stack/safety/meta_reference/shields/llama_guard.py rename to llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py index 3cdfeac13..c5c4f58a6 100644 --- a/llama_stack/safety/meta_reference/shields/llama_guard.py +++ b/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py @@ -14,7 +14,7 @@ from llama_models.llama3.api.datatypes import Message, Role from transformers import AutoModelForCausalLM, AutoTokenizer from .base import CANNED_RESPONSE_TEXT, OnViolationAction, ShieldBase, ShieldResponse -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 SAFE_RESPONSE = "safe" _INSTANCE = None diff --git a/llama_stack/safety/meta_reference/shields/prompt_guard.py b/llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py similarity index 99% rename from llama_stack/safety/meta_reference/shields/prompt_guard.py rename to llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py index 2e5683a3d..67bc6a6db 100644 --- a/llama_stack/safety/meta_reference/shields/prompt_guard.py +++ b/llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py @@ -14,7 +14,7 @@ from termcolor import cprint from transformers import AutoModelForSequenceClassification, AutoTokenizer from .base import message_content_as_str, OnViolationAction, ShieldResponse, TextShield -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 class PromptGuardShield(TextShield): diff --git a/llama_stack/telemetry/console/__init__.py b/llama_stack/providers/impls/meta_reference/telemetry/__init__.py similarity index 100% rename from llama_stack/telemetry/console/__init__.py rename to llama_stack/providers/impls/meta_reference/telemetry/__init__.py diff --git a/llama_stack/telemetry/console/config.py b/llama_stack/providers/impls/meta_reference/telemetry/config.py similarity index 100% rename from llama_stack/telemetry/console/config.py rename to llama_stack/providers/impls/meta_reference/telemetry/config.py diff --git a/llama_stack/telemetry/console/console.py b/llama_stack/providers/impls/meta_reference/telemetry/console.py similarity index 97% rename from llama_stack/telemetry/console/console.py rename to llama_stack/providers/impls/meta_reference/telemetry/console.py index 9b855818e..b56c704a6 100644 --- a/llama_stack/telemetry/console/console.py +++ b/llama_stack/providers/impls/meta_reference/telemetry/console.py @@ -6,7 +6,7 @@ from typing import Optional -from llama_stack.telemetry.api import * # noqa: F403 +from llama_stack.apis.telemetry import * # noqa: F403 from .config import ConsoleConfig diff --git a/llama_stack/memory/meta_reference/__init__.py b/llama_stack/providers/registry/__init__.py similarity index 100% rename from llama_stack/memory/meta_reference/__init__.py rename to llama_stack/providers/registry/__init__.py diff --git a/llama_stack/agentic_system/providers.py b/llama_stack/providers/registry/agents.py similarity index 80% rename from llama_stack/agentic_system/providers.py rename to llama_stack/providers/registry/agents.py index 7db33c95d..acee340a8 100644 --- a/llama_stack/agentic_system/providers.py +++ b/llama_stack/providers/registry/agents.py @@ -12,7 +12,7 @@ from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec def available_providers() -> List[ProviderSpec]: return [ InlineProviderSpec( - api=Api.agentic_system, + api=Api.agents, provider_id="meta-reference", pip_packages=[ "codeshield", @@ -23,8 +23,8 @@ def available_providers() -> List[ProviderSpec]: "torch", "transformers", ], - module="llama_stack.agentic_system.meta_reference", - config_class="llama_stack.agentic_system.meta_reference.MetaReferenceImplConfig", + module="llama_stack.providers.impls.meta_reference.agents", + config_class="llama_stack.providers.impls.meta_reference.agents.MetaReferenceImplConfig", api_dependencies=[ Api.inference, Api.safety, diff --git a/llama_stack/inference/providers.py b/llama_stack/providers/registry/inference.py similarity index 67% rename from llama_stack/inference/providers.py rename to llama_stack/providers/registry/inference.py index 069cda1d2..202a316e6 100644 --- a/llama_stack/inference/providers.py +++ b/llama_stack/providers/registry/inference.py @@ -24,15 +24,15 @@ def available_providers() -> List[ProviderSpec]: "transformers", "zmq", ], - module="llama_stack.inference.meta_reference", - config_class="llama_stack.inference.meta_reference.MetaReferenceImplConfig", + module="llama_stack.providers.impls.meta_reference.inference", + config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceImplConfig", ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_id="ollama", pip_packages=["ollama"], - module="llama_stack.inference.adapters.ollama", + module="llama_stack.providers.adapters.inference.ollama", ), ), remote_provider_spec( @@ -40,8 +40,8 @@ def available_providers() -> List[ProviderSpec]: adapter=AdapterSpec( adapter_id="tgi", pip_packages=["huggingface_hub"], - module="llama_stack.inference.adapters.tgi", - config_class="llama_stack.inference.adapters.tgi.TGIImplConfig", + module="llama_stack.providers.adapters.inference.tgi", + config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig", ), ), remote_provider_spec( @@ -51,8 +51,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "fireworks-ai", ], - module="llama_stack.inference.adapters.fireworks", - config_class="llama_stack.inference.adapters.fireworks.FireworksImplConfig", + module="llama_stack.providers.adapters.inference.fireworks", + config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig", ), ), remote_provider_spec( @@ -62,8 +62,8 @@ def available_providers() -> List[ProviderSpec]: pip_packages=[ "together", ], - module="llama_stack.inference.adapters.together", - config_class="llama_stack.inference.adapters.together.TogetherImplConfig", + module="llama_stack.providers.adapters.inference.together", + config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig", ), ), ] diff --git a/llama_stack/memory/providers.py b/llama_stack/providers/registry/memory.py similarity index 68% rename from llama_stack/memory/providers.py rename to llama_stack/providers/registry/memory.py index bffd9eb64..02f4958f1 100644 --- a/llama_stack/memory/providers.py +++ b/llama_stack/providers/registry/memory.py @@ -20,17 +20,17 @@ def available_providers() -> List[ProviderSpec]: return [ InlineProviderSpec( api=Api.memory, - provider_id="meta-reference-faiss", + provider_id="meta-reference", pip_packages=EMBEDDING_DEPS + ["faiss-cpu"], - module="llama_stack.memory.meta_reference.faiss", - config_class="llama_stack.memory.meta_reference.faiss.FaissImplConfig", + module="llama_stack.providers.impls.meta_reference.memory", + config_class="llama_stack.providers.impls.meta_reference.memory.FaissImplConfig", ), remote_provider_spec( Api.memory, AdapterSpec( adapter_id="chromadb", pip_packages=EMBEDDING_DEPS + ["chromadb-client"], - module="llama_stack.memory.adapters.chroma", + module="llama_stack.providers.adapters.memory.chroma", ), ), remote_provider_spec( @@ -38,8 +38,8 @@ def available_providers() -> List[ProviderSpec]: AdapterSpec( adapter_id="pgvector", pip_packages=EMBEDDING_DEPS + ["psycopg2-binary"], - module="llama_stack.memory.adapters.pgvector", - config_class="llama_stack.memory.adapters.pgvector.PGVectorConfig", + module="llama_stack.providers.adapters.memory.pgvector", + config_class="llama_stack.providers.adapters.memory.pgvector.PGVectorConfig", ), ), ] diff --git a/llama_stack/safety/providers.py b/llama_stack/providers/registry/safety.py similarity index 79% rename from llama_stack/safety/providers.py rename to llama_stack/providers/registry/safety.py index 3fb653a34..764af258e 100644 --- a/llama_stack/safety/providers.py +++ b/llama_stack/providers/registry/safety.py @@ -20,7 +20,7 @@ def available_providers() -> List[ProviderSpec]: "torch", "transformers", ], - module="llama_stack.safety.meta_reference", - config_class="llama_stack.safety.meta_reference.SafetyConfig", + module="llama_stack.providers.impls.meta_reference.safety", + config_class="llama_stack.providers.impls.meta_reference.safety.SafetyConfig", ), ] diff --git a/llama_stack/telemetry/providers.py b/llama_stack/providers/registry/telemetry.py similarity index 67% rename from llama_stack/telemetry/providers.py rename to llama_stack/providers/registry/telemetry.py index 1359dbbbd..0199666da 100644 --- a/llama_stack/telemetry/providers.py +++ b/llama_stack/providers/registry/telemetry.py @@ -13,9 +13,9 @@ def available_providers() -> List[ProviderSpec]: return [ InlineProviderSpec( api=Api.telemetry, - provider_id="console", + provider_id="meta-reference", pip_packages=[], - module="llama_stack.telemetry.console", - config_class="llama_stack.telemetry.console.ConsoleConfig", + module="llama_stack.providers.impls.meta_reference.telemetry", + config_class="llama_stack.providers.impls.meta_reference.telemetry.ConsoleConfig", ), ] diff --git a/llama_stack/safety/__init__.py b/llama_stack/providers/routers/__init__.py similarity index 100% rename from llama_stack/safety/__init__.py rename to llama_stack/providers/routers/__init__.py diff --git a/llama_stack/memory/router/__init__.py b/llama_stack/providers/routers/memory/__init__.py similarity index 91% rename from llama_stack/memory/router/__init__.py rename to llama_stack/providers/routers/memory/__init__.py index 828be53a8..4843d688b 100644 --- a/llama_stack/memory/router/__init__.py +++ b/llama_stack/providers/routers/memory/__init__.py @@ -10,7 +10,7 @@ from llama_stack.core.datatypes import Api async def get_router_impl(inner_impls: List[Tuple[str, Any]], deps: List[Api]): - from .router import MemoryRouterImpl + from .memory import MemoryRouterImpl impl = MemoryRouterImpl(inner_impls, deps) await impl.initialize() diff --git a/llama_stack/memory/router/router.py b/llama_stack/providers/routers/memory/memory.py similarity index 98% rename from llama_stack/memory/router/router.py rename to llama_stack/providers/routers/memory/memory.py index c63d85d66..85c284e2f 100644 --- a/llama_stack/memory/router/router.py +++ b/llama_stack/providers/routers/memory/memory.py @@ -7,7 +7,7 @@ from typing import Any, Dict, List, Tuple from llama_stack.core.datatypes import Api -from llama_stack.memory.api import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 class MemoryRouterImpl(Memory): diff --git a/llama_stack/safety/meta_reference/shields/contrib/__init__.py b/llama_stack/providers/utils/__init__.py similarity index 100% rename from llama_stack/safety/meta_reference/shields/contrib/__init__.py rename to llama_stack/providers/utils/__init__.py diff --git a/llama_stack/telemetry/__init__.py b/llama_stack/providers/utils/agents/__init__.py similarity index 100% rename from llama_stack/telemetry/__init__.py rename to llama_stack/providers/utils/agents/__init__.py diff --git a/llama_stack/agentic_system/event_logger.py b/llama_stack/providers/utils/agents/event_logger.py similarity index 98% rename from llama_stack/agentic_system/event_logger.py rename to llama_stack/providers/utils/agents/event_logger.py index c0bd89ee2..1d3f2a68a 100644 --- a/llama_stack/agentic_system/event_logger.py +++ b/llama_stack/providers/utils/agents/event_logger.py @@ -11,7 +11,7 @@ from llama_models.llama3.api.tool_utils import ToolUtils from termcolor import cprint -from llama_stack.agentic_system.api import AgenticSystemTurnResponseEventType, StepType +from llama_stack.apis.agents import AgentTurnResponseEventType, StepType class LogEvent: @@ -37,7 +37,7 @@ class LogEvent: cprint(f"{str(self)}", color=self.color, end=self.end, flush=flush) -EventType = AgenticSystemTurnResponseEventType +EventType = AgentTurnResponseEventType class EventLogger: diff --git a/llama_stack/agentic_system/execute_with_custom_tools.py b/llama_stack/providers/utils/agents/execute_with_custom_tools.py similarity index 86% rename from llama_stack/agentic_system/execute_with_custom_tools.py rename to llama_stack/providers/utils/agents/execute_with_custom_tools.py index fe9ef17b3..928d444ca 100644 --- a/llama_stack/agentic_system/execute_with_custom_tools.py +++ b/llama_stack/providers/utils/agents/execute_with_custom_tools.py @@ -7,20 +7,18 @@ from typing import AsyncGenerator, List from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.agentic_system.api import * # noqa: F403 -from llama_stack.memory.api import * # noqa: F403 -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.agents import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 -from llama_stack.agentic_system.api import ( - AgenticSystemTurnResponseEventType as EventType, -) +from llama_stack.apis.agents import AgentTurnResponseEventType as EventType from llama_stack.tools.custom.datatypes import CustomTool class AgentWithCustomToolExecutor: def __init__( self, - api: AgenticSystem, + api: Agents, agent_id: str, session_id: str, agent_config: AgentConfig, @@ -46,7 +44,7 @@ class AgentWithCustomToolExecutor: while n_iter < max_iters: n_iter += 1 - request = AgenticSystemTurnCreateRequest( + request = AgentTurnCreateRequest( agent_id=self.agent_id, session_id=self.session_id, messages=current_messages, @@ -55,7 +53,7 @@ class AgentWithCustomToolExecutor: ) turn = None - async for chunk in self.api.create_agentic_system_turn(request): + async for chunk in self.api.create_agent_turn(request): if chunk.event.payload.event_type != EventType.turn_complete.value: yield chunk else: diff --git a/llama_stack/agentic_system/api/__init__.py b/llama_stack/providers/utils/inference/__init__.py similarity index 83% rename from llama_stack/agentic_system/api/__init__.py rename to llama_stack/providers/utils/inference/__init__.py index a7e55ba91..756f351d8 100644 --- a/llama_stack/agentic_system/api/__init__.py +++ b/llama_stack/providers/utils/inference/__init__.py @@ -3,5 +3,3 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/inference/event_logger.py b/llama_stack/providers/utils/inference/event_logger.py similarity index 97% rename from llama_stack/inference/event_logger.py rename to llama_stack/providers/utils/inference/event_logger.py index 6e27998a5..c64ffb6bd 100644 --- a/llama_stack/inference/event_logger.py +++ b/llama_stack/providers/utils/inference/event_logger.py @@ -4,12 +4,11 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from termcolor import cprint - -from llama_stack.inference.api import ( +from llama_stack.apis.inference import ( ChatCompletionResponseEventType, ChatCompletionResponseStreamChunk, ) +from termcolor import cprint class LogEvent: diff --git a/llama_stack/inference/prepare_messages.py b/llama_stack/providers/utils/inference/prepare_messages.py similarity index 98% rename from llama_stack/inference/prepare_messages.py rename to llama_stack/providers/utils/inference/prepare_messages.py index ef818daa1..0519cbfab 100644 --- a/llama_stack/inference/prepare_messages.py +++ b/llama_stack/providers/utils/inference/prepare_messages.py @@ -5,7 +5,7 @@ # the root directory of this source tree. from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 from llama_models.llama3.prompt_templates import ( BuiltinToolGenerator, FunctionTagCustomToolGenerator, diff --git a/llama_stack/evaluations/api/__init__.py b/llama_stack/providers/utils/memory/__init__.py similarity index 83% rename from llama_stack/evaluations/api/__init__.py rename to llama_stack/providers/utils/memory/__init__.py index a7e55ba91..756f351d8 100644 --- a/llama_stack/evaluations/api/__init__.py +++ b/llama_stack/providers/utils/memory/__init__.py @@ -3,5 +3,3 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/memory/common/file_utils.py b/llama_stack/providers/utils/memory/file_utils.py similarity index 100% rename from llama_stack/memory/common/file_utils.py rename to llama_stack/providers/utils/memory/file_utils.py diff --git a/llama_stack/memory/common/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py similarity index 99% rename from llama_stack/memory/common/vector_store.py rename to llama_stack/providers/utils/memory/vector_store.py index aca3fd13d..d575a985b 100644 --- a/llama_stack/memory/common/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -20,7 +20,7 @@ from pypdf import PdfReader from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.tokenizer import Tokenizer -from llama_stack.memory.api import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 ALL_MINILM_L6_V2_DIMENSION = 384 diff --git a/llama_stack/batch_inference/api/__init__.py b/llama_stack/providers/utils/telemetry/__init__.py similarity index 83% rename from llama_stack/batch_inference/api/__init__.py rename to llama_stack/providers/utils/telemetry/__init__.py index a7e55ba91..756f351d8 100644 --- a/llama_stack/batch_inference/api/__init__.py +++ b/llama_stack/providers/utils/telemetry/__init__.py @@ -3,5 +3,3 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py similarity index 99% rename from llama_stack/telemetry/tracing.py rename to llama_stack/providers/utils/telemetry/tracing.py index 8375ff32c..5284dfac0 100644 --- a/llama_stack/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -15,7 +15,7 @@ from functools import wraps from typing import Any, Dict, List -from llama_stack.telemetry.api import * # noqa: F403 +from llama_stack.apis.telemetry import * # noqa: F403 def generate_short_uuid(len: int = 12): diff --git a/llama_stack/reward_scoring/api/__init__.py b/llama_stack/reward_scoring/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/reward_scoring/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/safety/api/__init__.py b/llama_stack/safety/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/safety/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/stack.py b/llama_stack/stack.py index 69ce8bcd1..f972edc1e 100644 --- a/llama_stack/stack.py +++ b/llama_stack/stack.py @@ -5,23 +5,23 @@ # the root directory of this source tree. from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.agentic_system.api import * # noqa: F403 -from llama_stack.dataset.api import * # noqa: F403 +from llama_stack.apis.agents import * # noqa: F403 +from llama_stack.apis.dataset import * # noqa: F403 from llama_stack.evaluations.api import * # noqa: F403 -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.batch_inference.api import * # noqa: F403 -from llama_stack.memory.api import * # noqa: F403 -from llama_stack.telemetry.api import * # noqa: F403 -from llama_stack.post_training.api import * # noqa: F403 -from llama_stack.reward_scoring.api import * # noqa: F403 -from llama_stack.synthetic_data_generation.api import * # noqa: F403 -from llama_stack.safety.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.apis.batch_inference import * # noqa: F403 +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.apis.telemetry import * # noqa: F403 +from llama_stack.apis.post_training import * # noqa: F403 +from llama_stack.apis.reward_scoring import * # noqa: F403 +from llama_stack.apis.synthetic_data_generation import * # noqa: F403 +from llama_stack.apis.safety import * # noqa: F403 class LlamaStack( Inference, BatchInference, - AgenticSystem, + Agents, RewardScoring, Safety, SyntheticDataGeneration, diff --git a/llama_stack/synthetic_data_generation/api/__init__.py b/llama_stack/synthetic_data_generation/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/synthetic_data_generation/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/telemetry/api/__init__.py b/llama_stack/telemetry/api/__init__.py deleted file mode 100644 index a7e55ba91..000000000 --- a/llama_stack/telemetry/api/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from .api import * # noqa: F401 F403 diff --git a/llama_stack/tools/base.py b/llama_stack/tools/base.py index f81085a15..15fba7e2e 100644 --- a/llama_stack/tools/base.py +++ b/llama_stack/tools/base.py @@ -7,7 +7,7 @@ from abc import ABC, abstractmethod from typing import List -from llama_stack.inference.api import Message +from llama_stack.apis.inference import Message class BaseTool(ABC): diff --git a/llama_stack/tools/builtin.py b/llama_stack/tools/builtin.py index 45ac97b88..4c9cdfcd2 100644 --- a/llama_stack/tools/builtin.py +++ b/llama_stack/tools/builtin.py @@ -21,8 +21,8 @@ from .ipython_tool.code_execution import ( TOOLS_ATTACHMENT_KEY_REGEX, ) -from llama_stack.inference.api import * # noqa: F403 -from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.apis.agents import * # noqa: F403 from .base import BaseTool diff --git a/llama_stack/tools/custom/datatypes.py b/llama_stack/tools/custom/datatypes.py index 8ae1d678d..c8dacefa3 100644 --- a/llama_stack/tools/custom/datatypes.py +++ b/llama_stack/tools/custom/datatypes.py @@ -10,7 +10,7 @@ from abc import abstractmethod from typing import Dict, List from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.agentic_system.api import * # noqa: F403 +from llama_stack.apis.agents import * # noqa: F403 class CustomTool: diff --git a/llama_stack/tools/safety.py b/llama_stack/tools/safety.py index 5a33bbadb..d36dc3490 100644 --- a/llama_stack/tools/safety.py +++ b/llama_stack/tools/safety.py @@ -6,10 +6,10 @@ from typing import List -from llama_stack.agentic_system.meta_reference.safety import ShieldRunnerMixin +from llama_stack.apis.inference import Message +from llama_stack.apis.safety import Safety, ShieldDefinition -from llama_stack.inference.api import Message -from llama_stack.safety.api import Safety, ShieldDefinition +from llama_stack.providers.impls.meta_reference.agents.safety import ShieldRunnerMixin from .builtin import BaseTool