mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-30 07:39:38 +00:00
llama_toolchain -> llama_stack
This commit is contained in:
parent
f372355409
commit
2cf731faea
175 changed files with 300 additions and 279 deletions
|
@ -1,4 +1,4 @@
|
||||||
include requirements.txt
|
include requirements.txt
|
||||||
include llama_toolchain/data/*.yaml
|
include llama_stack/data/*.yaml
|
||||||
include llama_toolchain/core/*.sh
|
include llama_stack/core/*.sh
|
||||||
include llama_toolchain/cli/scripts/*.sh
|
include llama_stack/cli/scripts/*.sh
|
||||||
|
|
|
@ -276,16 +276,16 @@ The following command and specifications allows you to get started with building
|
||||||
```
|
```
|
||||||
llama stack build <path/to/config>
|
llama stack build <path/to/config>
|
||||||
```
|
```
|
||||||
- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder.
|
- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder.
|
||||||
|
|
||||||
The file will be of the contents
|
The file will be of the contents
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml
|
||||||
|
|
||||||
name: 8b-instruct
|
name: 8b-instruct
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
distribution_type: local
|
distribution_type: local
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
docker_image: null
|
docker_image: null
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
|
@ -311,7 +311,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener
|
||||||
To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
|
To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
|
||||||
|
|
||||||
name: local-tgi-conda-example
|
name: local-tgi-conda-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
|
@ -328,7 +328,7 @@ image_type: conda
|
||||||
|
|
||||||
The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
|
The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
|
||||||
```
|
```
|
||||||
llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
|
llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
|
||||||
```
|
```
|
||||||
|
|
||||||
We provide some example build configs to help you get started with building with different API providers.
|
We provide some example build configs to help you get started with building with different API providers.
|
||||||
|
@ -337,11 +337,11 @@ We provide some example build configs to help you get started with building with
|
||||||
To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
|
To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml
|
||||||
|
|
||||||
name: local-docker-example
|
name: local-docker-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
docker_image: null
|
docker_image: null
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
|
@ -354,7 +354,7 @@ image_type: docker
|
||||||
|
|
||||||
The following command allows you to build a Docker image with the name `docker-local`
|
The following command allows you to build a Docker image with the name `docker-local`
|
||||||
```
|
```
|
||||||
llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
|
llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
|
||||||
|
|
||||||
Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
|
Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
@ -482,7 +482,7 @@ Once the server is setup, we can test it with a client to see the example output
|
||||||
cd /path/to/llama-stack
|
cd /path/to/llama-stack
|
||||||
conda activate <env> # any environment containing the llama-toolchain pip package will work
|
conda activate <env> # any environment containing the llama-toolchain pip package will work
|
||||||
|
|
||||||
python -m llama_toolchain.inference.client localhost 5000
|
python -m llama_stack.inference.client localhost 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
This will run the chat completion client and query the distribution’s /inference/chat_completion API.
|
This will run the chat completion client and query the distribution’s /inference/chat_completion API.
|
||||||
|
@ -500,7 +500,7 @@ You know what's even more hilarious? People like you who think they can just Goo
|
||||||
Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
|
Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m llama_toolchain.safety.client localhost 5000
|
python -m llama_stack.safety.client localhost 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
|
||||||
|
|
|
@ -9,7 +9,7 @@ This guides allows you to quickly get started with building and running a Llama
|
||||||
|
|
||||||
**`llama stack build`**
|
**`llama stack build`**
|
||||||
```
|
```
|
||||||
llama stack build --config ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack
|
llama stack build --config ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml --name my-local-llama-stack
|
||||||
...
|
...
|
||||||
...
|
...
|
||||||
Build spec configuration saved at ~/.llama/distributions/conda/my-local-llama-stack-build.yaml
|
Build spec configuration saved at ~/.llama/distributions/conda/my-local-llama-stack-build.yaml
|
||||||
|
@ -97,16 +97,16 @@ The following command and specifications allows you to get started with building
|
||||||
```
|
```
|
||||||
llama stack build <path/to/config>
|
llama stack build <path/to/config>
|
||||||
```
|
```
|
||||||
- You will be required to pass in a file path to the build.config file (e.g. `./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_toolchain/configs/distributions/` folder.
|
- You will be required to pass in a file path to the build.config file (e.g. `./llama_stack/configs/distributions/conda/local-conda-example-build.yaml`). We provide some example build config files for configuring different types of distributions in the `./llama_stack/configs/distributions/` folder.
|
||||||
|
|
||||||
The file will be of the contents
|
The file will be of the contents
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/conda/local-conda-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/conda/local-conda-example-build.yaml
|
||||||
|
|
||||||
name: 8b-instruct
|
name: 8b-instruct
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
distribution_type: local
|
distribution_type: local
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
docker_image: null
|
docker_image: null
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
|
@ -132,7 +132,7 @@ After this step is complete, a file named `8b-instruct-build.yaml` will be gener
|
||||||
To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
|
To specify a different API provider, we can change the `distribution_spec` in our `<name>-build.yaml` config. For example, the following build spec allows you to build a distribution using TGI as the inference API provider.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml
|
||||||
|
|
||||||
name: local-tgi-conda-example
|
name: local-tgi-conda-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
|
@ -149,7 +149,7 @@ image_type: conda
|
||||||
|
|
||||||
The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
|
The following command allows you to build a distribution with TGI as the inference API provider, with the name `tgi`.
|
||||||
```
|
```
|
||||||
llama stack build --config ./llama_toolchain/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
|
llama stack build --config ./llama_stack/configs/distributions/conda/local-tgi-conda-example-build.yaml --name tgi
|
||||||
```
|
```
|
||||||
|
|
||||||
We provide some example build configs to help you get started with building with different API providers.
|
We provide some example build configs to help you get started with building with different API providers.
|
||||||
|
@ -158,11 +158,11 @@ We provide some example build configs to help you get started with building with
|
||||||
To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
|
To build a docker image, simply change the `image_type` to `docker` in our `<name>-build.yaml` file, and run `llama stack build --config <name>-build.yaml`.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ cat ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml
|
$ cat ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml
|
||||||
|
|
||||||
name: local-docker-example
|
name: local-docker-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
docker_image: null
|
docker_image: null
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
|
@ -175,7 +175,7 @@ image_type: docker
|
||||||
|
|
||||||
The following command allows you to build a Docker image with the name `docker-local`
|
The following command allows you to build a Docker image with the name `docker-local`
|
||||||
```
|
```
|
||||||
llama stack build --config ./llama_toolchain/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
|
llama stack build --config ./llama_stack/configs/distributions/docker/local-docker-example-build.yaml --name docker-local
|
||||||
|
|
||||||
Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
|
Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
@ -296,7 +296,7 @@ Once the server is setup, we can test it with a client to see the example output
|
||||||
cd /path/to/llama-stack
|
cd /path/to/llama-stack
|
||||||
conda activate <env> # any environment containing the llama-toolchain pip package will work
|
conda activate <env> # any environment containing the llama-toolchain pip package will work
|
||||||
|
|
||||||
python -m llama_toolchain.inference.client localhost 5000
|
python -m llama_stack.inference.client localhost 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
This will run the chat completion client and query the distribution’s /inference/chat_completion API.
|
This will run the chat completion client and query the distribution’s /inference/chat_completion API.
|
||||||
|
@ -314,7 +314,7 @@ You know what's even more hilarious? People like you who think they can just Goo
|
||||||
Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
|
Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by:
|
||||||
|
|
||||||
```
|
```
|
||||||
python -m llama_toolchain.safety.client localhost 5000
|
python -m llama_stack.safety.client localhost 5000
|
||||||
```
|
```
|
||||||
|
|
||||||
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
|
You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/sdk_examples) repo.
|
||||||
|
|
|
@ -14,10 +14,10 @@ from pydantic import BaseModel, ConfigDict, Field
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.common.deployment_types import * # noqa: F403
|
from llama_stack.common.deployment_types import * # noqa: F403
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.safety.api import * # noqa: F403
|
from llama_stack.safety.api import * # noqa: F403
|
||||||
from llama_toolchain.memory.api import * # noqa: F403
|
from llama_stack.memory.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
|
@ -16,7 +16,7 @@ from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.core.datatypes import RemoteProviderConfig
|
from llama_stack.core.datatypes import RemoteProviderConfig
|
||||||
|
|
||||||
from .api import * # noqa: F403
|
from .api import * # noqa: F403
|
||||||
from .event_logger import EventLogger
|
from .event_logger import EventLogger
|
||||||
|
@ -186,7 +186,7 @@ async def run_rag(host: str, port: int):
|
||||||
]
|
]
|
||||||
|
|
||||||
# Alternatively, you can pre-populate the memory bank with documents for example,
|
# Alternatively, you can pre-populate the memory bank with documents for example,
|
||||||
# using `llama_toolchain.memory.client`. Then you can grab the bank_id
|
# using `llama_stack.memory.client`. Then you can grab the bank_id
|
||||||
# from the output of that run.
|
# from the output of that run.
|
||||||
tool_definitions = [
|
tool_definitions = [
|
||||||
MemoryToolDefinition(
|
MemoryToolDefinition(
|
|
@ -11,10 +11,7 @@ from llama_models.llama3.api.tool_utils import ToolUtils
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import (
|
from llama_stack.agentic_system.api import AgenticSystemTurnResponseEventType, StepType
|
||||||
AgenticSystemTurnResponseEventType,
|
|
||||||
StepType,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class LogEvent:
|
class LogEvent:
|
|
@ -7,14 +7,14 @@
|
||||||
from typing import AsyncGenerator, List
|
from typing import AsyncGenerator, List
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_stack.agentic_system.api import * # noqa: F403
|
||||||
from llama_toolchain.memory.api import * # noqa: F403
|
from llama_stack.memory.api import * # noqa: F403
|
||||||
from llama_toolchain.safety.api import * # noqa: F403
|
from llama_stack.safety.api import * # noqa: F403
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import (
|
from llama_stack.agentic_system.api import (
|
||||||
AgenticSystemTurnResponseEventType as EventType,
|
AgenticSystemTurnResponseEventType as EventType,
|
||||||
)
|
)
|
||||||
from llama_toolchain.tools.custom.datatypes import CustomTool
|
from llama_stack.tools.custom.datatypes import CustomTool
|
||||||
|
|
||||||
|
|
||||||
class AgentWithCustomToolExecutor:
|
class AgentWithCustomToolExecutor:
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import Api, ProviderSpec
|
from llama_stack.core.datatypes import Api, ProviderSpec
|
||||||
|
|
||||||
from .config import MetaReferenceImplConfig
|
from .config import MetaReferenceImplConfig
|
||||||
|
|
|
@ -20,13 +20,13 @@ import httpx
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_stack.agentic_system.api import * # noqa: F403
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.memory.api import * # noqa: F403
|
from llama_stack.memory.api import * # noqa: F403
|
||||||
from llama_toolchain.safety.api import * # noqa: F403
|
from llama_stack.safety.api import * # noqa: F403
|
||||||
|
|
||||||
from llama_toolchain.tools.base import BaseTool
|
from llama_stack.tools.base import BaseTool
|
||||||
from llama_toolchain.tools.builtin import (
|
from llama_stack.tools.builtin import (
|
||||||
interpret_content_as_attachment,
|
interpret_content_as_attachment,
|
||||||
SingleMessageBuiltinTool,
|
SingleMessageBuiltinTool,
|
||||||
)
|
)
|
|
@ -10,17 +10,17 @@ import tempfile
|
||||||
import uuid
|
import uuid
|
||||||
from typing import AsyncGenerator
|
from typing import AsyncGenerator
|
||||||
|
|
||||||
from llama_toolchain.inference.api import Inference
|
from llama_stack.inference.api import Inference
|
||||||
from llama_toolchain.memory.api import Memory
|
from llama_stack.memory.api import Memory
|
||||||
from llama_toolchain.safety.api import Safety
|
from llama_stack.safety.api import Safety
|
||||||
from llama_toolchain.agentic_system.api import * # noqa: F403
|
from llama_stack.agentic_system.api import * # noqa: F403
|
||||||
from llama_toolchain.tools.builtin import (
|
from llama_stack.tools.builtin import (
|
||||||
CodeInterpreterTool,
|
CodeInterpreterTool,
|
||||||
PhotogenTool,
|
PhotogenTool,
|
||||||
SearchTool,
|
SearchTool,
|
||||||
WolframAlphaTool,
|
WolframAlphaTool,
|
||||||
)
|
)
|
||||||
from llama_toolchain.tools.safety import with_safety
|
from llama_stack.tools.safety import with_safety
|
||||||
|
|
||||||
from .agent_instance import ChatAgent
|
from .agent_instance import ChatAgent
|
||||||
from .config import MetaReferenceImplConfig
|
from .config import MetaReferenceImplConfig
|
|
@ -10,14 +10,14 @@ from jinja2 import Template
|
||||||
from llama_models.llama3.api import * # noqa: F403
|
from llama_models.llama3.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import (
|
from llama_stack.agentic_system.api import (
|
||||||
DefaultMemoryQueryGeneratorConfig,
|
DefaultMemoryQueryGeneratorConfig,
|
||||||
LLMMemoryQueryGeneratorConfig,
|
LLMMemoryQueryGeneratorConfig,
|
||||||
MemoryQueryGenerator,
|
MemoryQueryGenerator,
|
||||||
MemoryQueryGeneratorConfig,
|
MemoryQueryGeneratorConfig,
|
||||||
)
|
)
|
||||||
from termcolor import cprint # noqa: F401
|
from termcolor import cprint # noqa: F401
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
async def generate_rag_query(
|
async def generate_rag_query(
|
|
@ -9,7 +9,7 @@ from typing import List
|
||||||
from llama_models.llama3.api.datatypes import Message, Role, UserMessage
|
from llama_models.llama3.api.datatypes import Message, Role, UserMessage
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.safety.api import (
|
from llama_stack.safety.api import (
|
||||||
OnViolationAction,
|
OnViolationAction,
|
||||||
RunShieldRequest,
|
RunShieldRequest,
|
||||||
Safety,
|
Safety,
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import Api, InlineProviderSpec, ProviderSpec
|
from llama_stack.core.datatypes import Api, InlineProviderSpec, ProviderSpec
|
||||||
|
|
||||||
|
|
||||||
def available_providers() -> List[ProviderSpec]:
|
def available_providers() -> List[ProviderSpec]:
|
||||||
|
@ -23,8 +23,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
"torch",
|
"torch",
|
||||||
"transformers",
|
"transformers",
|
||||||
],
|
],
|
||||||
module="llama_toolchain.agentic_system.meta_reference",
|
module="llama_stack.agentic_system.meta_reference",
|
||||||
config_class="llama_toolchain.agentic_system.meta_reference.MetaReferenceImplConfig",
|
config_class="llama_stack.agentic_system.meta_reference.MetaReferenceImplConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.inference,
|
Api.inference,
|
||||||
Api.safety,
|
Api.safety,
|
|
@ -11,7 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
|
@ -20,7 +20,7 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class Download(Subcommand):
|
class Download(Subcommand):
|
||||||
|
@ -92,7 +92,7 @@ def _hf_download(
|
||||||
from huggingface_hub import snapshot_download
|
from huggingface_hub import snapshot_download
|
||||||
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
from huggingface_hub.utils import GatedRepoError, RepositoryNotFoundError
|
||||||
|
|
||||||
from llama_toolchain.common.model_utils import model_local_dir
|
from llama_stack.common.model_utils import model_local_dir
|
||||||
|
|
||||||
repo_id = model.huggingface_repo
|
repo_id = model.huggingface_repo
|
||||||
if repo_id is None:
|
if repo_id is None:
|
||||||
|
@ -126,7 +126,7 @@ def _hf_download(
|
||||||
def _meta_download(model: "Model", meta_url: str):
|
def _meta_download(model: "Model", meta_url: str):
|
||||||
from llama_models.sku_list import llama_meta_net_info
|
from llama_models.sku_list import llama_meta_net_info
|
||||||
|
|
||||||
from llama_toolchain.common.model_utils import model_local_dir
|
from llama_stack.common.model_utils import model_local_dir
|
||||||
|
|
||||||
output_dir = Path(model_local_dir(model.descriptor()))
|
output_dir = Path(model_local_dir(model.descriptor()))
|
||||||
os.makedirs(output_dir, exist_ok=True)
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
@ -188,7 +188,7 @@ class Manifest(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
def _download_from_manifest(manifest_file: str):
|
def _download_from_manifest(manifest_file: str):
|
||||||
from llama_toolchain.common.model_utils import model_local_dir
|
from llama_stack.common.model_utils import model_local_dir
|
||||||
|
|
||||||
with open(manifest_file, "r") as f:
|
with open(manifest_file, "r") as f:
|
||||||
d = json.load(f)
|
d = json.load(f)
|
|
@ -9,12 +9,12 @@ import json
|
||||||
|
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
|
||||||
from llama_toolchain.cli.table import print_table
|
|
||||||
from llama_toolchain.common.serialize import EnumEncoder
|
|
||||||
|
|
||||||
from termcolor import colored
|
from termcolor import colored
|
||||||
|
|
||||||
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
from llama_stack.cli.table import print_table
|
||||||
|
from llama_stack.common.serialize import EnumEncoder
|
||||||
|
|
||||||
|
|
||||||
class ModelDescribe(Subcommand):
|
class ModelDescribe(Subcommand):
|
||||||
"""Show details about a model"""
|
"""Show details about a model"""
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class ModelDownload(Subcommand):
|
class ModelDownload(Subcommand):
|
||||||
|
@ -19,6 +19,6 @@ class ModelDownload(Subcommand):
|
||||||
formatter_class=argparse.RawTextHelpFormatter,
|
formatter_class=argparse.RawTextHelpFormatter,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_toolchain.cli.download import setup_download_parser
|
from llama_stack.cli.download import setup_download_parser
|
||||||
|
|
||||||
setup_download_parser(self.parser)
|
setup_download_parser(self.parser)
|
|
@ -8,8 +8,8 @@ import argparse
|
||||||
|
|
||||||
from llama_models.sku_list import all_registered_models
|
from llama_models.sku_list import all_registered_models
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_toolchain.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
|
|
||||||
|
|
||||||
class ModelList(Subcommand):
|
class ModelList(Subcommand):
|
|
@ -6,12 +6,12 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.model.describe import ModelDescribe
|
from llama_stack.cli.model.describe import ModelDescribe
|
||||||
from llama_toolchain.cli.model.download import ModelDownload
|
from llama_stack.cli.model.download import ModelDownload
|
||||||
from llama_toolchain.cli.model.list import ModelList
|
from llama_stack.cli.model.list import ModelList
|
||||||
from llama_toolchain.cli.model.template import ModelTemplate
|
from llama_stack.cli.model.template import ModelTemplate
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class ModelParser(Subcommand):
|
class ModelParser(Subcommand):
|
|
@ -9,7 +9,7 @@ import textwrap
|
||||||
|
|
||||||
from termcolor import colored
|
from termcolor import colored
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class ModelTemplate(Subcommand):
|
class ModelTemplate(Subcommand):
|
||||||
|
@ -75,7 +75,7 @@ class ModelTemplate(Subcommand):
|
||||||
render_jinja_template,
|
render_jinja_template,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_toolchain.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
|
|
||||||
if args.name:
|
if args.name:
|
||||||
tool_prompt_format = self._prompt_type(args.format)
|
tool_prompt_format = self._prompt_type(args.format)
|
|
@ -6,8 +6,8 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
@ -29,7 +29,7 @@ class StackBuild(Subcommand):
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
"config",
|
"config",
|
||||||
type=str,
|
type=str,
|
||||||
help="Path to a config file to use for the build. You may find example configs in llama_toolchain/configs/distributions",
|
help="Path to a config file to use for the build. You may find example configs in llama_stack/configs/distributions",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -44,17 +44,17 @@ class StackBuild(Subcommand):
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from llama_toolchain.common.config_dirs import DISTRIBS_BASE_DIR
|
from llama_stack.common.config_dirs import DISTRIBS_BASE_DIR
|
||||||
from llama_toolchain.common.serialize import EnumEncoder
|
from llama_stack.common.serialize import EnumEncoder
|
||||||
from llama_toolchain.core.package import ApiInput, build_image, ImageType
|
from llama_stack.core.package import ApiInput, build_image, ImageType
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
# save build.yaml spec for building same distribution again
|
# save build.yaml spec for building same distribution again
|
||||||
if build_config.image_type == ImageType.docker.value:
|
if build_config.image_type == ImageType.docker.value:
|
||||||
# docker needs build file to be in the llama-stack repo dir to be able to copy over to the image
|
# docker needs build file to be in the llama-stack repo dir to be able to copy over to the image
|
||||||
llama_toolchain_path = Path(os.path.relpath(__file__)).parent.parent.parent
|
llama_stack_path = Path(os.path.relpath(__file__)).parent.parent.parent
|
||||||
build_dir = (
|
build_dir = (
|
||||||
llama_toolchain_path / "configs/distributions" / build_config.image_type
|
llama_stack_path / "configs/distributions" / build_config.image_type
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
build_dir = DISTRIBS_BASE_DIR / build_config.image_type
|
build_dir = DISTRIBS_BASE_DIR / build_config.image_type
|
||||||
|
@ -74,12 +74,12 @@ class StackBuild(Subcommand):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
def _run_stack_build_command(self, args: argparse.Namespace) -> None:
|
||||||
from llama_toolchain.common.prompt_for_config import prompt_for_config
|
from llama_stack.common.prompt_for_config import prompt_for_config
|
||||||
from llama_toolchain.core.dynamic import instantiate_class_type
|
from llama_stack.core.dynamic import instantiate_class_type
|
||||||
|
|
||||||
if not args.config:
|
if not args.config:
|
||||||
self.parser.error(
|
self.parser.error(
|
||||||
"No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_toolchain/configs/distributions"
|
"No config file specified. Please use `llama stack build /path/to/*-build.yaml`. Example config files can be found in llama_stack/configs/distributions"
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
|
@ -13,11 +13,11 @@ import pkg_resources
|
||||||
import yaml
|
import yaml
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_toolchain.common.config_dirs import BUILDS_BASE_DIR
|
from llama_stack.common.config_dirs import BUILDS_BASE_DIR
|
||||||
|
|
||||||
from llama_toolchain.common.exec import run_with_pty
|
from llama_stack.common.exec import run_with_pty
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
@ -49,7 +49,7 @@ class StackConfigure(Subcommand):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_configure_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_toolchain.core.package import ImageType
|
from llama_stack.core.package import ImageType
|
||||||
|
|
||||||
docker_image = None
|
docker_image = None
|
||||||
build_config_file = Path(args.config)
|
build_config_file = Path(args.config)
|
||||||
|
@ -66,7 +66,7 @@ class StackConfigure(Subcommand):
|
||||||
os.makedirs(builds_dir, exist_ok=True)
|
os.makedirs(builds_dir, exist_ok=True)
|
||||||
|
|
||||||
script = pkg_resources.resource_filename(
|
script = pkg_resources.resource_filename(
|
||||||
"llama_toolchain", "core/configure_container.sh"
|
"llama_stack", "core/configure_container.sh"
|
||||||
)
|
)
|
||||||
script_args = [script, docker_image, str(builds_dir)]
|
script_args = [script, docker_image, str(builds_dir)]
|
||||||
|
|
||||||
|
@ -95,8 +95,8 @@ class StackConfigure(Subcommand):
|
||||||
build_config: BuildConfig,
|
build_config: BuildConfig,
|
||||||
output_dir: Optional[str] = None,
|
output_dir: Optional[str] = None,
|
||||||
):
|
):
|
||||||
from llama_toolchain.common.serialize import EnumEncoder
|
from llama_stack.common.serialize import EnumEncoder
|
||||||
from llama_toolchain.core.configure import configure_api_providers
|
from llama_stack.core.configure import configure_api_providers
|
||||||
|
|
||||||
builds_dir = BUILDS_BASE_DIR / build_config.image_type
|
builds_dir = BUILDS_BASE_DIR / build_config.image_type
|
||||||
if output_dir:
|
if output_dir:
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class StackListApis(Subcommand):
|
class StackListApis(Subcommand):
|
||||||
|
@ -25,8 +25,8 @@ class StackListApis(Subcommand):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
|
def _run_apis_list_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_toolchain.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_toolchain.core.distribution import stack_apis
|
from llama_stack.core.distribution import stack_apis
|
||||||
|
|
||||||
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
|
# eventually, this should query a registry at llama.meta.com/llamastack/distributions
|
||||||
headers = [
|
headers = [
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
|
|
||||||
class StackListProviders(Subcommand):
|
class StackListProviders(Subcommand):
|
||||||
|
@ -22,7 +22,7 @@ class StackListProviders(Subcommand):
|
||||||
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
self.parser.set_defaults(func=self._run_providers_list_cmd)
|
||||||
|
|
||||||
def _add_arguments(self):
|
def _add_arguments(self):
|
||||||
from llama_toolchain.core.distribution import stack_apis
|
from llama_stack.core.distribution import stack_apis
|
||||||
|
|
||||||
api_values = [a.value for a in stack_apis()]
|
api_values = [a.value for a in stack_apis()]
|
||||||
self.parser.add_argument(
|
self.parser.add_argument(
|
||||||
|
@ -33,8 +33,8 @@ class StackListProviders(Subcommand):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
|
def _run_providers_list_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_toolchain.cli.table import print_table
|
from llama_stack.cli.table import print_table
|
||||||
from llama_toolchain.core.distribution import Api, api_providers
|
from llama_stack.core.distribution import Api, api_providers
|
||||||
|
|
||||||
all_providers = api_providers()
|
all_providers = api_providers()
|
||||||
providers_for_api = all_providers[Api(args.api)]
|
providers_for_api = all_providers[Api(args.api)]
|
|
@ -11,8 +11,8 @@ from pathlib import Path
|
||||||
import pkg_resources
|
import pkg_resources
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
class StackRun(Subcommand):
|
class StackRun(Subcommand):
|
||||||
|
@ -47,7 +47,7 @@ class StackRun(Subcommand):
|
||||||
)
|
)
|
||||||
|
|
||||||
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
|
||||||
from llama_toolchain.common.exec import run_with_pty
|
from llama_stack.common.exec import run_with_pty
|
||||||
|
|
||||||
if not args.config:
|
if not args.config:
|
||||||
self.parser.error("Must specify a config file to run")
|
self.parser.error("Must specify a config file to run")
|
||||||
|
@ -67,13 +67,13 @@ class StackRun(Subcommand):
|
||||||
|
|
||||||
if config.docker_image:
|
if config.docker_image:
|
||||||
script = pkg_resources.resource_filename(
|
script = pkg_resources.resource_filename(
|
||||||
"llama_toolchain",
|
"llama_stack",
|
||||||
"core/start_container.sh",
|
"core/start_container.sh",
|
||||||
)
|
)
|
||||||
run_args = [script, config.docker_image]
|
run_args = [script, config.docker_image]
|
||||||
else:
|
else:
|
||||||
script = pkg_resources.resource_filename(
|
script = pkg_resources.resource_filename(
|
||||||
"llama_toolchain",
|
"llama_stack",
|
||||||
"core/start_conda_env.sh",
|
"core/start_conda_env.sh",
|
||||||
)
|
)
|
||||||
run_args = [
|
run_args = [
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
from llama_toolchain.cli.subcommand import Subcommand
|
from llama_stack.cli.subcommand import Subcommand
|
||||||
|
|
||||||
from .build import StackBuild
|
from .build import StackBuild
|
||||||
from .configure import StackConfigure
|
from .configure import StackConfigure
|
|
@ -1,6 +1,6 @@
|
||||||
name: local-conda-example
|
name: local-conda-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference-faiss
|
|
@ -1,6 +1,6 @@
|
||||||
name: local-docker-example
|
name: local-docker-example
|
||||||
distribution_spec:
|
distribution_spec:
|
||||||
description: Use code from `llama_toolchain` itself to serve all llama stack APIs
|
description: Use code from `llama_stack` itself to serve all llama stack APIs
|
||||||
providers:
|
providers:
|
||||||
inference: meta-reference
|
inference: meta-reference
|
||||||
memory: meta-reference-faiss
|
memory: meta-reference-faiss
|
|
@ -90,7 +90,7 @@ add_to_docker <<EOF
|
||||||
# This would be good in production but for debugging flexibility lets not add it right now
|
# This would be good in production but for debugging flexibility lets not add it right now
|
||||||
# We need a more solid production ready entrypoint.sh anyway
|
# We need a more solid production ready entrypoint.sh anyway
|
||||||
#
|
#
|
||||||
# ENTRYPOINT ["python", "-m", "llama_toolchain.core.server"]
|
# ENTRYPOINT ["python", "-m", "llama_stack.core.server"]
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
|
@ -8,12 +8,12 @@ from typing import Any
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.common.prompt_for_config import prompt_for_config
|
from llama_stack.common.prompt_for_config import prompt_for_config
|
||||||
from llama_toolchain.core.distribution import api_providers, stack_apis
|
from llama_stack.core.distribution import api_providers, stack_apis
|
||||||
from llama_toolchain.core.dynamic import instantiate_class_type
|
from llama_stack.core.dynamic import instantiate_class_type
|
||||||
|
|
||||||
|
|
||||||
# These are hacks so we can re-use the `prompt_for_config` utility
|
# These are hacks so we can re-use the `prompt_for_config` utility
|
|
@ -151,7 +151,7 @@ as being "Llama Stack compatible"
|
||||||
def module(self) -> str:
|
def module(self) -> str:
|
||||||
if self.adapter:
|
if self.adapter:
|
||||||
return self.adapter.module
|
return self.adapter.module
|
||||||
return f"llama_toolchain.{self.api.value}.client"
|
return f"llama_stack.{self.api.value}.client"
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def pip_packages(self) -> List[str]:
|
def pip_packages(self) -> List[str]:
|
||||||
|
@ -167,7 +167,7 @@ def remote_provider_spec(
|
||||||
config_class = (
|
config_class = (
|
||||||
adapter.config_class
|
adapter.config_class
|
||||||
if adapter and adapter.config_class
|
if adapter and adapter.config_class
|
||||||
else "llama_toolchain.core.datatypes.RemoteProviderConfig"
|
else "llama_stack.core.datatypes.RemoteProviderConfig"
|
||||||
)
|
)
|
||||||
provider_id = remote_provider_id(adapter.adapter_id) if adapter else "remote"
|
provider_id = remote_provider_id(adapter.adapter_id) if adapter else "remote"
|
||||||
|
|
|
@ -8,11 +8,11 @@ import importlib
|
||||||
import inspect
|
import inspect
|
||||||
from typing import Dict, List
|
from typing import Dict, List
|
||||||
|
|
||||||
from llama_toolchain.agentic_system.api import AgenticSystem
|
from llama_stack.agentic_system.api import AgenticSystem
|
||||||
from llama_toolchain.inference.api import Inference
|
from llama_stack.inference.api import Inference
|
||||||
from llama_toolchain.memory.api import Memory
|
from llama_stack.memory.api import Memory
|
||||||
from llama_toolchain.safety.api import Safety
|
from llama_stack.safety.api import Safety
|
||||||
from llama_toolchain.telemetry.api import Telemetry
|
from llama_stack.telemetry.api import Telemetry
|
||||||
|
|
||||||
from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
|
from .datatypes import Api, ApiEndpoint, ProviderSpec, remote_provider_spec
|
||||||
|
|
||||||
|
@ -67,7 +67,7 @@ def api_providers() -> Dict[Api, Dict[str, ProviderSpec]]:
|
||||||
ret = {}
|
ret = {}
|
||||||
for api in stack_apis():
|
for api in stack_apis():
|
||||||
name = api.name.lower()
|
name = api.name.lower()
|
||||||
module = importlib.import_module(f"llama_toolchain.{name}.providers")
|
module = importlib.import_module(f"llama_stack.{name}.providers")
|
||||||
ret[api] = {
|
ret[api] = {
|
||||||
"remote": remote_provider_spec(api),
|
"remote": remote_provider_spec(api),
|
||||||
**{a.provider_id: a for a in module.available_providers()},
|
**{a.provider_id: a for a in module.available_providers()},
|
|
@ -7,7 +7,7 @@
|
||||||
import importlib
|
import importlib
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
def instantiate_class_type(fully_qualified_name):
|
def instantiate_class_type(fully_qualified_name):
|
|
@ -12,12 +12,12 @@ from pydantic import BaseModel
|
||||||
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
from llama_toolchain.common.exec import run_with_pty
|
from llama_stack.common.exec import run_with_pty
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from llama_toolchain.core.distribution import api_providers, SERVER_DEPENDENCIES
|
from llama_stack.core.distribution import api_providers, SERVER_DEPENDENCIES
|
||||||
|
|
||||||
|
|
||||||
class ImageType(Enum):
|
class ImageType(Enum):
|
||||||
|
@ -68,7 +68,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
|
||||||
|
|
||||||
if build_config.image_type == ImageType.docker.value:
|
if build_config.image_type == ImageType.docker.value:
|
||||||
script = pkg_resources.resource_filename(
|
script = pkg_resources.resource_filename(
|
||||||
"llama_toolchain", "core/build_container.sh"
|
"llama_stack", "core/build_container.sh"
|
||||||
)
|
)
|
||||||
args = [
|
args = [
|
||||||
script,
|
script,
|
||||||
|
@ -79,7 +79,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path):
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
script = pkg_resources.resource_filename(
|
script = pkg_resources.resource_filename(
|
||||||
"llama_toolchain", "core/build_conda_env.sh"
|
"llama_stack", "core/build_conda_env.sh"
|
||||||
)
|
)
|
||||||
args = [
|
args = [
|
||||||
script,
|
script,
|
|
@ -39,13 +39,13 @@ from pydantic import BaseModel, ValidationError
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
|
||||||
from llama_toolchain.telemetry.tracing import (
|
from llama_stack.telemetry.tracing import (
|
||||||
end_trace,
|
end_trace,
|
||||||
setup_logger,
|
setup_logger,
|
||||||
SpanStatus,
|
SpanStatus,
|
||||||
start_trace,
|
start_trace,
|
||||||
)
|
)
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
|
|
||||||
from .distribution import api_endpoints, api_providers
|
from .distribution import api_endpoints, api_providers
|
||||||
from .dynamic import instantiate_provider
|
from .dynamic import instantiate_provider
|
||||||
|
@ -309,7 +309,7 @@ async def resolve_impls(
|
||||||
|
|
||||||
specs[api] = RouterProviderSpec(
|
specs[api] = RouterProviderSpec(
|
||||||
api=api,
|
api=api,
|
||||||
module=f"llama_toolchain.{api.value.lower()}.router",
|
module=f"llama_stack.{api.value.lower()}.router",
|
||||||
api_dependencies=[],
|
api_dependencies=[],
|
||||||
inner_specs=inner_specs,
|
inner_specs=inner_specs,
|
||||||
)
|
)
|
|
@ -37,6 +37,6 @@ eval "$(conda shell.bash hook)"
|
||||||
conda deactivate && conda activate "$env_name"
|
conda deactivate && conda activate "$env_name"
|
||||||
|
|
||||||
$CONDA_PREFIX/bin/python \
|
$CONDA_PREFIX/bin/python \
|
||||||
-m llama_toolchain.core.server \
|
-m llama_stack.core.server \
|
||||||
--yaml_config "$yaml_config" \
|
--yaml_config "$yaml_config" \
|
||||||
--port "$port" "$@"
|
--port "$port" "$@"
|
|
@ -38,6 +38,6 @@ podman run -it \
|
||||||
-p $port:$port \
|
-p $port:$port \
|
||||||
-v "$yaml_config:/app/config.yaml" \
|
-v "$yaml_config:/app/config.yaml" \
|
||||||
$docker_image \
|
$docker_image \
|
||||||
python -m llama_toolchain.core.server \
|
python -m llama_stack.core.server \
|
||||||
--yaml_config /app/config.yaml \
|
--yaml_config /app/config.yaml \
|
||||||
--port $port "$@"
|
--port $port "$@"
|
|
@ -12,8 +12,8 @@ from llama_models.schema_utils import webmethod
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.dataset.api import * # noqa: F403
|
from llama_stack.dataset.api import * # noqa: F403
|
||||||
from llama_toolchain.common.training_types import * # noqa: F403
|
from llama_stack.common.training_types import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
class TextGenerationMetric(Enum):
|
class TextGenerationMetric(Enum):
|
|
@ -13,8 +13,8 @@ from llama_models.llama3.api.datatypes import Message, StopReason
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
|
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
from llama_stack.inference.prepare_messages import prepare_messages
|
||||||
|
|
||||||
from .config import FireworksImplConfig
|
from .config import FireworksImplConfig
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import RemoteProviderConfig
|
from llama_stack.core.datatypes import RemoteProviderConfig
|
||||||
|
|
||||||
|
|
||||||
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
async def get_adapter_impl(config: RemoteProviderConfig, _deps):
|
|
@ -14,8 +14,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
from ollama import AsyncClient
|
from ollama import AsyncClient
|
||||||
|
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
from llama_stack.inference.prepare_messages import prepare_messages
|
||||||
|
|
||||||
# TODO: Eventually this will move to the llama cli model list command
|
# TODO: Eventually this will move to the llama cli model list command
|
||||||
# mapping of Model SKUs to ollama models
|
# mapping of Model SKUs to ollama models
|
|
@ -13,8 +13,8 @@ from huggingface_hub import HfApi, InferenceClient
|
||||||
from llama_models.llama3.api.chat_format import ChatFormat
|
from llama_models.llama3.api.chat_format import ChatFormat
|
||||||
from llama_models.llama3.api.datatypes import StopReason
|
from llama_models.llama3.api.datatypes import StopReason
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
from llama_stack.inference.prepare_messages import prepare_messages
|
||||||
|
|
||||||
from .config import TGIImplConfig
|
from .config import TGIImplConfig
|
||||||
|
|
|
@ -13,8 +13,8 @@ from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
from together import Together
|
from together import Together
|
||||||
|
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
from llama_stack.inference.prepare_messages import prepare_messages
|
||||||
|
|
||||||
from .config import TogetherImplConfig
|
from .config import TogetherImplConfig
|
||||||
|
|
|
@ -10,11 +10,11 @@ from typing import Any, AsyncGenerator
|
||||||
|
|
||||||
import fire
|
import fire
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import RemoteProviderConfig
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.core.datatypes import RemoteProviderConfig
|
||||||
|
|
||||||
from .api import (
|
from .api import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
|
@ -4,11 +4,12 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_toolchain.inference.api import (
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.inference.api import (
|
||||||
ChatCompletionResponseEventType,
|
ChatCompletionResponseEventType,
|
||||||
ChatCompletionResponseStreamChunk,
|
ChatCompletionResponseStreamChunk,
|
||||||
)
|
)
|
||||||
from termcolor import cprint
|
|
||||||
|
|
||||||
|
|
||||||
class LogEvent:
|
class LogEvent:
|
|
@ -13,7 +13,7 @@ from llama_models.sku_list import all_registered_models, resolve_model
|
||||||
|
|
||||||
from pydantic import BaseModel, Field, field_validator
|
from pydantic import BaseModel, Field, field_validator
|
||||||
|
|
||||||
from llama_toolchain.inference.api import QuantizationConfig
|
from llama_stack.inference.api import QuantizationConfig
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
|
@ -28,11 +28,11 @@ from llama_models.llama3.api.datatypes import Message, ToolPromptFormat
|
||||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||||
from llama_models.llama3.reference_impl.model import Transformer
|
from llama_models.llama3.reference_impl.model import Transformer
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
|
|
||||||
from llama_toolchain.common.model_utils import model_local_dir
|
|
||||||
from llama_toolchain.inference.api import QuantizationType
|
|
||||||
from termcolor import cprint
|
from termcolor import cprint
|
||||||
|
|
||||||
|
from llama_stack.common.model_utils import model_local_dir
|
||||||
|
from llama_stack.inference.api import QuantizationType
|
||||||
|
|
||||||
from .config import MetaReferenceImplConfig
|
from .config import MetaReferenceImplConfig
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ from typing import AsyncIterator, Union
|
||||||
from llama_models.llama3.api.datatypes import StopReason
|
from llama_models.llama3.api.datatypes import StopReason
|
||||||
from llama_models.sku_list import resolve_model
|
from llama_models.sku_list import resolve_model
|
||||||
|
|
||||||
from llama_toolchain.inference.api import (
|
from llama_stack.inference.api import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponse,
|
ChatCompletionResponse,
|
||||||
ChatCompletionResponseEvent,
|
ChatCompletionResponseEvent,
|
||||||
|
@ -21,13 +21,13 @@ from llama_toolchain.inference.api import (
|
||||||
ToolCallDelta,
|
ToolCallDelta,
|
||||||
ToolCallParseStatus,
|
ToolCallParseStatus,
|
||||||
)
|
)
|
||||||
from llama_toolchain.inference.prepare_messages import prepare_messages
|
from llama_stack.inference.prepare_messages import prepare_messages
|
||||||
|
|
||||||
from .config import MetaReferenceImplConfig
|
from .config import MetaReferenceImplConfig
|
||||||
from .model_parallel import LlamaModelParallelGenerator
|
from .model_parallel import LlamaModelParallelGenerator
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
|
|
||||||
# there's a single model parallel process running serving the model. for now,
|
# there's a single model parallel process running serving the model. for now,
|
||||||
# we don't support multiple concurrent requests to this process.
|
# we don't support multiple concurrent requests to this process.
|
|
@ -5,7 +5,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||||
from llama_toolchain.inference.api import * # noqa: F403
|
from llama_stack.inference.api import * # noqa: F403
|
||||||
from llama_models.llama3.prompt_templates import (
|
from llama_models.llama3.prompt_templates import (
|
||||||
BuiltinToolGenerator,
|
BuiltinToolGenerator,
|
||||||
FunctionTagCustomToolGenerator,
|
FunctionTagCustomToolGenerator,
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from llama_toolchain.core.datatypes import * # noqa: F403
|
from llama_stack.core.datatypes import * # noqa: F403
|
||||||
|
|
||||||
|
|
||||||
def available_providers() -> List[ProviderSpec]:
|
def available_providers() -> List[ProviderSpec]:
|
||||||
|
@ -24,15 +24,15 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
"transformers",
|
"transformers",
|
||||||
"zmq",
|
"zmq",
|
||||||
],
|
],
|
||||||
module="llama_toolchain.inference.meta_reference",
|
module="llama_stack.inference.meta_reference",
|
||||||
config_class="llama_toolchain.inference.meta_reference.MetaReferenceImplConfig",
|
config_class="llama_stack.inference.meta_reference.MetaReferenceImplConfig",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_id="ollama",
|
adapter_id="ollama",
|
||||||
pip_packages=["ollama"],
|
pip_packages=["ollama"],
|
||||||
module="llama_toolchain.inference.adapters.ollama",
|
module="llama_stack.inference.adapters.ollama",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -40,8 +40,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_id="tgi",
|
adapter_id="tgi",
|
||||||
pip_packages=["huggingface_hub"],
|
pip_packages=["huggingface_hub"],
|
||||||
module="llama_toolchain.inference.adapters.tgi",
|
module="llama_stack.inference.adapters.tgi",
|
||||||
config_class="llama_toolchain.inference.adapters.tgi.TGIImplConfig",
|
config_class="llama_stack.inference.adapters.tgi.TGIImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -51,8 +51,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"fireworks-ai",
|
"fireworks-ai",
|
||||||
],
|
],
|
||||||
module="llama_toolchain.inference.adapters.fireworks",
|
module="llama_stack.inference.adapters.fireworks",
|
||||||
config_class="llama_toolchain.inference.adapters.fireworks.FireworksImplConfig",
|
config_class="llama_stack.inference.adapters.fireworks.FireworksImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -62,8 +62,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"together",
|
"together",
|
||||||
],
|
],
|
||||||
module="llama_toolchain.inference.adapters.together",
|
module="llama_stack.inference.adapters.together",
|
||||||
config_class="llama_toolchain.inference.adapters.together.TogetherImplConfig",
|
config_class="llama_stack.inference.adapters.together.TogetherImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
|
@ -14,9 +14,9 @@ import torch
|
||||||
|
|
||||||
from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region
|
from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region
|
||||||
from llama_models.llama3.api.model import Transformer, TransformerBlock
|
from llama_models.llama3.api.model import Transformer, TransformerBlock
|
||||||
from llama_toolchain.inference.api import QuantizationType
|
from llama_stack.inference.api import QuantizationType
|
||||||
|
|
||||||
from llama_toolchain.inference.api.config import (
|
from llama_stack.inference.api.config import (
|
||||||
CheckpointQuantizationFormat,
|
CheckpointQuantizationFormat,
|
||||||
MetaReferenceImplConfig,
|
MetaReferenceImplConfig,
|
||||||
)
|
)
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue