From 210b71b0bac014b37bffe5fcecbc7a911d8a66da Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 3 Oct 2024 11:07:53 -0700 Subject: [PATCH 01/69] fix prompt guard (#177) Several other fixes to configure. Add support for 1b/3b models in ollama. --- docs/cli_reference.md | 10 ++--- llama_stack/apis/models/client.py | 2 +- llama_stack/distribution/build_conda_env.sh | 2 +- llama_stack/distribution/configure.py | 22 ++++++++++- llama_stack/distribution/datatypes.py | 4 +- .../docker/llamastack-local-cpu/run.yaml | 2 +- .../adapters/inference/ollama/__init__.py | 4 ++ .../adapters/inference/ollama/ollama.py | 3 +- .../impls/meta_reference/safety/config.py | 6 +-- .../impls/meta_reference/safety/safety.py | 39 ++++++------------- llama_stack/providers/registry/inference.py | 1 + 11 files changed, 50 insertions(+), 45 deletions(-) diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 3541d0b4e..8e5feeb6b 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -117,9 +117,9 @@ llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`. ```bash -llama download --source huggingface --model-id Meta-Llama3.1-8B-Instruct --hf-token +llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token -llama download --source huggingface --model-id Meta-Llama3.1-70B-Instruct --hf-token +llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original* llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original* @@ -230,7 +230,7 @@ You will be shown a Markdown formatted description of the model interface and ho - Please see our [Getting Started](getting_started.md) guide for more details on how to build and start a Llama Stack distribution. ### Step 3.1 Build -In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: +In the following steps, imagine we'll be working with a `Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: - `name`: the name for our distribution (e.g. `8b-instruct`) - `image_type`: our build image type (`conda | docker`) - `distribution_spec`: our distribution specs for specifying API providers @@ -365,7 +365,7 @@ llama stack configure [ | | $ llama stack configure ~/.llama/distributions/conda/8b-instruct-build.yaml Configuring API: inference (meta-reference) -Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required): +Enter value for model (existing: Llama3.1-8B-Instruct) (required): Enter value for quantization (optional): Enter value for torch_seed (optional): Enter value for max_seq_len (existing: 4096) (required): @@ -397,7 +397,7 @@ YAML configuration has been written to ~/.llama/builds/conda/8b-instruct-run.yam After this step is successful, you should be able to find a run configuration spec in `~/.llama/builds/conda/8b-instruct-run.yaml` with the following contents. You may edit this file to change the settings. As you can see, we did basic configuration above and configured: -- inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) +- inference to run on model `Llama3.1-8B-Instruct` (obtained from `llama model list`) - Llama Guard safety shield with model `Llama-Guard-3-1B` - Prompt Guard safety shield with model `Prompt-Guard-86M` diff --git a/llama_stack/apis/models/client.py b/llama_stack/apis/models/client.py index 0c26b1b50..b6fe6be8b 100644 --- a/llama_stack/apis/models/client.py +++ b/llama_stack/apis/models/client.py @@ -56,7 +56,7 @@ async def run_main(host: str, port: int, stream: bool): response = await client.list_models() cprint(f"list_models response={response}", "green") - response = await client.get_model("Meta-Llama3.1-8B-Instruct") + response = await client.get_model("Llama3.1-8B-Instruct") cprint(f"get_model response={response}", "blue") response = await client.get_model("Llama-Guard-3-1B") diff --git a/llama_stack/distribution/build_conda_env.sh b/llama_stack/distribution/build_conda_env.sh index 804e694a6..3d582b715 100755 --- a/llama_stack/distribution/build_conda_env.sh +++ b/llama_stack/distribution/build_conda_env.sh @@ -23,7 +23,7 @@ if [ "$#" -lt 3 ]; then exit 1 fi -special_pip_deps="$3" +special_pip_deps="$4" set -euo pipefail diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index e03b201ec..d678a2e00 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -6,8 +6,15 @@ from typing import Any -from pydantic import BaseModel +from llama_models.sku_list import ( + llama3_1_family, + llama3_2_family, + llama3_family, + resolve_model, + safety_models, +) +from pydantic import BaseModel from llama_stack.distribution.datatypes import * # noqa: F403 from prompt_toolkit import prompt from prompt_toolkit.validation import Validator @@ -27,6 +34,11 @@ from llama_stack.providers.impls.meta_reference.safety.config import ( ) +ALLOWED_MODELS = ( + llama3_family() + llama3_1_family() + llama3_2_family() + safety_models() +) + + def make_routing_entry_type(config_class: Any): class BaseModelWithConfig(BaseModel): routing_key: str @@ -104,7 +116,13 @@ def configure_api_providers( else: routing_key = prompt( "> Please enter the supported model your provider has for inference: ", - default="Meta-Llama3.1-8B-Instruct", + default="Llama3.1-8B-Instruct", + validator=Validator.from_callable( + lambda x: resolve_model(x) is not None, + error_message="Model must be: {}".format( + [x.descriptor() for x in ALLOWED_MODELS] + ), + ), ) routing_entries.append( RoutableProviderConfig( diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 2be6ede26..09778a761 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -117,10 +117,10 @@ Provider configurations for each of the APIs provided by this package. description=""" E.g. The following is a ProviderRoutingEntry for models: - - routing_key: Meta-Llama3.1-8B-Instruct + - routing_key: Llama3.1-8B-Instruct provider_type: meta-reference config: - model: Meta-Llama3.1-8B-Instruct + model: Llama3.1-8B-Instruct quantization: null torch_seed: null max_seq_len: 4096 diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml index aa5bb916f..f740897f3 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -36,7 +36,7 @@ routing_table: config: host: localhost port: 6000 - routing_key: Meta-Llama3.1-8B-Instruct + routing_key: Llama3.1-8B-Instruct safety: - provider_type: meta-reference config: diff --git a/llama_stack/providers/adapters/inference/ollama/__init__.py b/llama_stack/providers/adapters/inference/ollama/__init__.py index 2a1f7d140..7763af8d1 100644 --- a/llama_stack/providers/adapters/inference/ollama/__init__.py +++ b/llama_stack/providers/adapters/inference/ollama/__init__.py @@ -7,6 +7,10 @@ from llama_stack.distribution.datatypes import RemoteProviderConfig +class OllamaImplConfig(RemoteProviderConfig): + port: int = 11434 + + async def get_adapter_impl(config: RemoteProviderConfig, _deps): from .ollama import OllamaInferenceAdapter diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index c4d48af81..bd267a5f8 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -23,9 +23,10 @@ from llama_stack.providers.utils.inference.routable import RoutableProviderForMo # TODO: Eventually this will move to the llama cli model list command # mapping of Model SKUs to ollama models OLLAMA_SUPPORTED_SKUS = { - # "Llama3.1-8B-Instruct": "llama3.1", "Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16", "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16", + "Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16", + "Llama3.2-3B-Instruct": "llama3.2:3b-instruct-fp16", } diff --git a/llama_stack/providers/impls/meta_reference/safety/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py index 36428078d..64a39b3c6 100644 --- a/llama_stack/providers/impls/meta_reference/safety/config.py +++ b/llama_stack/providers/impls/meta_reference/safety/config.py @@ -47,10 +47,6 @@ class LlamaGuardShieldConfig(BaseModel): return model -class PromptGuardShieldConfig(BaseModel): - model: str = "Prompt-Guard-86M" - - class SafetyConfig(BaseModel): llama_guard_shield: Optional[LlamaGuardShieldConfig] = None - prompt_guard_shield: Optional[PromptGuardShieldConfig] = None + enable_prompt_guard: Optional[bool] = False diff --git a/llama_stack/providers/impls/meta_reference/safety/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py index f02574f19..0ac3b6244 100644 --- a/llama_stack/providers/impls/meta_reference/safety/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -6,8 +6,6 @@ from typing import Any, Dict, List -from llama_models.sku_list import resolve_model - from llama_stack.distribution.utils.model_utils import model_local_dir from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.safety import * # noqa: F403 @@ -20,21 +18,9 @@ from llama_stack.providers.impls.meta_reference.safety.shields.base import ( from .config import MetaReferenceShieldType, SafetyConfig -from .shields import ( - CodeScannerShield, - InjectionShield, - JailbreakShield, - LlamaGuardShield, - PromptGuardShield, - ShieldBase, -) +from .shields import CodeScannerShield, LlamaGuardShield, ShieldBase - -def resolve_and_get_path(model_name: str) -> str: - model = resolve_model(model_name) - assert model is not None, f"Could not resolve model {model_name}" - model_dir = model_local_dir(model.descriptor()) - return model_dir +PROMPT_GUARD_MODEL = "Prompt-Guard-86M" class MetaReferenceSafetyImpl(Safety, RoutableProvider): @@ -43,9 +29,10 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): self.inference_api = deps[Api.inference] async def initialize(self) -> None: - shield_cfg = self.config.prompt_guard_shield - if shield_cfg is not None: - model_dir = resolve_and_get_path(shield_cfg.model) + if self.config.enable_prompt_guard: + from .shields import PromptGuardShield + + model_dir = model_local_dir(PROMPT_GUARD_MODEL) _ = PromptGuardShield.instance(model_dir) async def shutdown(self) -> None: @@ -108,16 +95,14 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): disable_output_check=cfg.disable_output_check, ) elif typ == MetaReferenceShieldType.jailbreak_shield: - assert ( - cfg.prompt_guard_shield is not None - ), "Cannot use Jailbreak Shield since Prompt Guard not present in config" - model_dir = resolve_and_get_path(cfg.prompt_guard_shield.model) + from .shields import JailbreakShield + + model_dir = model_local_dir(PROMPT_GUARD_MODEL) return JailbreakShield.instance(model_dir) elif typ == MetaReferenceShieldType.injection_shield: - assert ( - cfg.prompt_guard_shield is not None - ), "Cannot use PromptGuardShield since not present in config" - model_dir = resolve_and_get_path(cfg.prompt_guard_shield.model) + from .shields import InjectionShield + + model_dir = model_local_dir(PROMPT_GUARD_MODEL) return InjectionShield.instance(model_dir) elif typ == MetaReferenceShieldType.code_scanner_guard: return CodeScannerShield.instance() diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 47e142201..6cd97fd73 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -41,6 +41,7 @@ def available_providers() -> List[ProviderSpec]: adapter=AdapterSpec( adapter_type="ollama", pip_packages=["ollama"], + config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig", module="llama_stack.providers.adapters.inference.ollama", ), ), From 06db9213b1f6d351c3e1e415004c282fedf8b304 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 3 Oct 2024 14:18:57 -0400 Subject: [PATCH 02/69] inference: Add model option to client (#170) I was running this client for testing purposes and being able to specify which model to use is a convenient addition. This change makes that possible. --- llama_stack/apis/inference/client.py | 30 +++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 92acc3e14..5cfae633c 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -6,6 +6,7 @@ import asyncio import json +import sys from typing import Any, AsyncGenerator, List, Optional import fire @@ -100,15 +101,18 @@ class InferenceClient(Inference): print(f"Error with parsing or validation: {e}") -async def run_main(host: str, port: int, stream: bool): +async def run_main(host: str, port: int, stream: bool, model: Optional[str]): client = InferenceClient(f"http://{host}:{port}") + if not model: + model = "Llama3.1-8B-Instruct" + message = UserMessage( content="hello world, write me a 2 sentence poem about the moon" ) cprint(f"User>{message.content}", "green") iterator = client.chat_completion( - model="Llama3.1-8B-Instruct", + model=model, messages=[message], stream=stream, ) @@ -116,9 +120,14 @@ async def run_main(host: str, port: int, stream: bool): log.print() -async def run_mm_main(host: str, port: int, stream: bool, path: str): +async def run_mm_main( + host: str, port: int, stream: bool, path: Optional[str], model: Optional[str] +): client = InferenceClient(f"http://{host}:{port}") + if not model: + model = "Llama3.2-11B-Vision-Instruct" + message = UserMessage( content=[ ImageMedia(image=URL(uri=f"file://{path}")), @@ -127,7 +136,7 @@ async def run_mm_main(host: str, port: int, stream: bool, path: str): ) cprint(f"User>{message.content}", "green") iterator = client.chat_completion( - model="Llama3.2-11B-Vision-Instruct", + model=model, messages=[message], stream=stream, ) @@ -135,11 +144,18 @@ async def run_mm_main(host: str, port: int, stream: bool, path: str): log.print() -def main(host: str, port: int, stream: bool = True, mm: bool = False, file: str = None): +def main( + host: str, + port: int, + stream: bool = True, + mm: bool = False, + file: Optional[str] = None, + model: Optional[str] = None, +): if mm: - asyncio.run(run_mm_main(host, port, stream, file)) + asyncio.run(run_mm_main(host, port, stream, file, model)) else: - asyncio.run(run_main(host, port, stream)) + asyncio.run(run_main(host, port, stream, model)) if __name__ == "__main__": From 62d266f0188014160898b66d3cde33457f5acd64 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 3 Oct 2024 11:20:54 -0700 Subject: [PATCH 03/69] [CLI] avoid configure twice (#171) * avoid configure twice * cleanup tmp config * update output msg * address comment * update msg * script update --- llama_stack/cli/stack/build.py | 14 ++++++++++---- llama_stack/distribution/build.py | 7 +++++-- llama_stack/distribution/build_container.sh | 11 ++++++++--- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index d502e4c84..95df6a737 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -137,10 +137,16 @@ class StackBuild(Subcommand): if build_config.image_type == "conda" else (f"llamastack-{build_config.name}") ) - cprint( - f"You can now run `llama stack configure {configure_name}`", - color="green", - ) + if build_config.image_type == "conda": + cprint( + f"You can now run `llama stack configure {configure_name}`", + color="green", + ) + else: + cprint( + f"You can now run `llama stack run {build_config.name}`", + color="green", + ) def _run_template_list_cmd(self, args: argparse.Namespace) -> None: import json diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index fe778bdb8..56186a5aa 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -8,15 +8,17 @@ from enum import Enum from typing import List, Optional import pkg_resources + +from llama_stack.distribution.utils.exec import run_with_pty from pydantic import BaseModel from termcolor import cprint -from llama_stack.distribution.utils.exec import run_with_pty - from llama_stack.distribution.datatypes import * # noqa: F403 from pathlib import Path +from llama_stack.distribution.distribution import api_providers, SERVER_DEPENDENCIES +from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.distribution import get_provider_registry @@ -95,6 +97,7 @@ def build_image(build_config: BuildConfig, build_file_path: Path): build_config.name, package_deps.docker_image, str(build_file_path), + str(BUILDS_BASE_DIR / ImageType.docker.value), " ".join(deps), ] else: diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 625c8cfc3..056a7c06c 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -10,7 +10,7 @@ if [ "$#" -lt 4 ]; then exit 1 fi -special_pip_deps="$5" +special_pip_deps="$6" set -euo pipefail @@ -18,7 +18,8 @@ build_name="$1" image_name="llamastack-$build_name" docker_base=$2 build_file_path=$3 -pip_dependencies=$4 +host_build_dir=$4 +pip_dependencies=$5 # Define color codes RED='\033[0;31m' @@ -33,7 +34,8 @@ REPO_CONFIGS_DIR="$REPO_DIR/tmp/configs" TEMP_DIR=$(mktemp -d) -llama stack configure $build_file_path --output-dir $REPO_CONFIGS_DIR +llama stack configure $build_file_path +cp $host_build_dir/$build_name-run.yaml $REPO_CONFIGS_DIR add_to_docker() { local input @@ -132,6 +134,9 @@ fi set -x $DOCKER_BINARY build $DOCKER_OPTS -t $image_name -f "$TEMP_DIR/Dockerfile" "$REPO_DIR" $mounts + +# clean up tmp/configs +rm -rf $REPO_CONFIGS_DIR set +x echo "Success! You can run it with: $DOCKER_BINARY $DOCKER_OPTS run -p 5000:5000 $image_name" From 7f4931582228526da7b168d14966c0cd6e58cefa Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 3 Oct 2024 11:25:58 -0700 Subject: [PATCH 04/69] Kill a derpy import --- llama_stack/distribution/build.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llama_stack/distribution/build.py b/llama_stack/distribution/build.py index 56186a5aa..13c545723 100644 --- a/llama_stack/distribution/build.py +++ b/llama_stack/distribution/build.py @@ -17,7 +17,6 @@ from termcolor import cprint from llama_stack.distribution.datatypes import * # noqa: F403 from pathlib import Path -from llama_stack.distribution.distribution import api_providers, SERVER_DEPENDENCIES from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR from llama_stack.distribution.distribution import get_provider_registry From 8d41e6caa90c0e7bb44ec1755302a943bdf654f1 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 3 Oct 2024 11:31:03 -0700 Subject: [PATCH 05/69] Bump version to 0.0.39 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index df3221371..ea6074703 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ blobfile fire httpx huggingface-hub -llama-models>=0.0.38 +llama-models>=0.0.39 prompt-toolkit python-dotenv pydantic>=2 diff --git a/setup.py b/setup.py index 804c9ba3d..06508150c 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read_requirements(): setup( name="llama_stack", - version="0.0.38", + version="0.0.39", author="Meta Llama", author_email="llama-oss@meta.com", description="Llama Stack", From f913b57397fb3a34d831aa408440998258a31d0e Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 3 Oct 2024 14:40:21 -0700 Subject: [PATCH 06/69] fix fp8 imports --- .../impls/meta_reference/inference/quantization/loader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py index 9c5182ead..1df86cb84 100644 --- a/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +++ b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py @@ -13,15 +13,15 @@ from typing import Optional import torch from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region -from llama_models.llama3.api.model import Transformer, TransformerBlock +from llama_models.datatypes import CheckpointQuantizationFormat +from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock from termcolor import cprint from torch import Tensor from llama_stack.apis.inference import QuantizationType -from llama_stack.apis.inference.config import ( - CheckpointQuantizationFormat, +from llama_stack.providers.impls.meta_reference.inference.config import ( MetaReferenceImplConfig, ) From 734f59d3b84091a9f21396eb404f050fe36e9232 Mon Sep 17 00:00:00 2001 From: AshleyT3 Date: Thu, 3 Oct 2024 23:24:47 -0700 Subject: [PATCH 07/69] Check that the model is found before use. (#182) --- llama_stack/cli/download.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index 658ed40e8..4d0966bb2 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -158,12 +158,11 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): info = prompt_guard_download_info() else: model = resolve_model(args.model_id) + if model is None: + parser.error(f"Model {args.model_id} not found") + return info = llama_meta_net_info(model) - if model is None: - parser.error(f"Model {args.model_id} not found") - return - if args.source == "huggingface": _hf_download(model, args.hf_token, args.ignore_patterns, parser) else: From 00ed9a410b405cf489d2a2dd73d896231d30c5d4 Mon Sep 17 00:00:00 2001 From: raghotham Date: Thu, 3 Oct 2024 23:28:43 -0700 Subject: [PATCH 08/69] Update getting_started.md update discord invite link --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 4e51bc079..e933493c9 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,7 +1,7 @@ # llama-stack [![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) -[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/TZAAYNVtrU) +[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) This repository contains the specifications and implementations of the APIs which are part of the Llama Stack. From 9bf2e354ae4460673f16111ee42f3d091301f4ae Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Fri, 4 Oct 2024 10:05:59 -0400 Subject: [PATCH 09/69] CLI now requires jq --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index ea6074703..a035cce28 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,7 @@ llama-models>=0.0.39 prompt-toolkit python-dotenv pydantic>=2 +jq requests rich termcolor From 441052b0fde4bd4a3e7b66256473f9275e43e98e Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Fri, 4 Oct 2024 10:11:43 -0400 Subject: [PATCH 10/69] avoid jq since non-standard on macOS --- llama_stack/cli/stack/configure.py | 4 ++-- requirements.txt | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index 7fbfaf769..b8940ea49 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -72,9 +72,9 @@ class StackConfigure(Subcommand): Path(os.path.expanduser("~/.conda/envs")) / f"llamastack-{args.config}" ) output = subprocess.check_output( - ["bash", "-c", "conda info --json -a | jq '.envs'"] + ["bash", "-c", "conda info --json -a"] ) - conda_envs = json.loads(output.decode("utf-8")) + conda_envs = json.loads(output.decode("utf-8"))["envs"] for x in conda_envs: if x.endswith(f"/llamastack-{args.config}"): diff --git a/requirements.txt b/requirements.txt index a035cce28..ea6074703 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ llama-models>=0.0.39 prompt-toolkit python-dotenv pydantic>=2 -jq requests rich termcolor From dc75aab547f8f96750c2ac544a1542d6be161c13 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 4 Oct 2024 09:30:54 -0700 Subject: [PATCH 11/69] Add setuptools dependency --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index ea6074703..a4e77bef6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ python-dotenv pydantic>=2 requests rich +setuptools termcolor From bfb0e92034e5f344d98473d340a52ee2f021ef05 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 4 Oct 2024 09:33:43 -0700 Subject: [PATCH 12/69] Bump version to 0.0.40 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a4e77bef6..cf63c05f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ blobfile fire httpx huggingface-hub -llama-models>=0.0.39 +llama-models>=0.0.40 prompt-toolkit python-dotenv pydantic>=2 diff --git a/setup.py b/setup.py index 06508150c..ae1f58015 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read_requirements(): setup( name="llama_stack", - version="0.0.39", + version="0.0.40", author="Meta Llama", author_email="llama-oss@meta.com", description="Llama Stack", From 9d16129603dcd91da0756cb796a4d777551d44bb Mon Sep 17 00:00:00 2001 From: Mindaugas Date: Sat, 5 Oct 2024 21:26:26 +0300 Subject: [PATCH 13/69] Add 'url' property to Redis KV config (#192) --- llama_stack/providers/utils/kvstore/config.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llama_stack/providers/utils/kvstore/config.py b/llama_stack/providers/utils/kvstore/config.py index 5893e4c4a..c84212eed 100644 --- a/llama_stack/providers/utils/kvstore/config.py +++ b/llama_stack/providers/utils/kvstore/config.py @@ -31,6 +31,10 @@ class RedisKVStoreConfig(CommonConfig): host: str = "localhost" port: int = 6379 + @property + def url(self) -> str: + return f"redis://{self.host}:{self.port}" + class SqliteKVStoreConfig(CommonConfig): type: Literal[KVStoreType.sqlite.value] = KVStoreType.sqlite.value From 6d4013ac99b72971833c9ad60cb1b58f9261bd3e Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sat, 5 Oct 2024 12:14:59 -0700 Subject: [PATCH 14/69] Update getting_started.md --- docs/getting_started.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index e933493c9..ef192e90d 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -66,8 +66,13 @@ This guides allows you to quickly get started with building and running a Llama You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for trying out out demo scripts. ## Quick Cheatsheet -- Quick 3 line command to build and start a LlamaStack server using our Meta Reference implementation for all API endpoints with `conda` as build type. +#### Via docker +``` +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack-local-gpu +``` + +#### Via conda **`llama stack build`** - You'll be prompted to enter build information interactively. ``` From 29138a51672e43b79988e5a5ddf866229ce15697 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sat, 5 Oct 2024 12:28:02 -0700 Subject: [PATCH 15/69] Update getting_started.md --- docs/getting_started.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/getting_started.md b/docs/getting_started.md index ef192e90d..32f4d2d15 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -72,6 +72,10 @@ You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/ docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack-local-gpu ``` +> [!NOTE] +> `~/.llama` should be the path containing downloaded weights of Llama models. + + #### Via conda **`llama stack build`** - You'll be prompted to enter build information interactively. From f73e247ba146a32ad0736176d0da2fad830597b8 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Sun, 6 Oct 2024 02:34:16 -0400 Subject: [PATCH 16/69] Inline vLLM inference provider (#181) This is just like `local` using `meta-reference` for everything except it uses `vllm` for inference. Docker works, but So far, `conda` is a bit easier to use with the vllm provider. The default container base image does not include all the necessary libraries for all vllm features. More cuda dependencies are necessary. I started changing this base image used in this template, but it also required changes to the Dockerfile, so it was getting too involved to include in the first PR. Working so far: * `python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream True` * `python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream False` Example: ``` $ python -m llama_stack.apis.inference.client localhost 5000 --model Llama3.2-1B-Instruct --stream False User>hello world, write me a 2 sentence poem about the moon Assistant> The moon glows bright in the midnight sky A beacon of light, ``` I have only tested these models: * `Llama3.1-8B-Instruct` - across 4 GPUs (tensor_parallel_size = 4) * `Llama3.2-1B-Instruct` - on a single GPU (tensor_parallel_size = 1) --- .../templates/local-vllm-build.yaml | 10 + llama_stack/providers/impls/vllm/__init__.py | 11 + llama_stack/providers/impls/vllm/config.py | 35 ++ llama_stack/providers/impls/vllm/vllm.py | 356 ++++++++++++++++++ llama_stack/providers/registry/inference.py | 9 + 5 files changed, 421 insertions(+) create mode 100644 llama_stack/distribution/templates/local-vllm-build.yaml create mode 100644 llama_stack/providers/impls/vllm/__init__.py create mode 100644 llama_stack/providers/impls/vllm/config.py create mode 100644 llama_stack/providers/impls/vllm/vllm.py diff --git a/llama_stack/distribution/templates/local-vllm-build.yaml b/llama_stack/distribution/templates/local-vllm-build.yaml new file mode 100644 index 000000000..e907cb7c9 --- /dev/null +++ b/llama_stack/distribution/templates/local-vllm-build.yaml @@ -0,0 +1,10 @@ +name: local-vllm +distribution_spec: + description: Like local, but use vLLM for running LLM inference + providers: + inference: vllm + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/llama_stack/providers/impls/vllm/__init__.py b/llama_stack/providers/impls/vllm/__init__.py new file mode 100644 index 000000000..3d5a81ad9 --- /dev/null +++ b/llama_stack/providers/impls/vllm/__init__.py @@ -0,0 +1,11 @@ +from typing import Any + +from .config import VLLMConfig + + +async def get_provider_impl(config: VLLMConfig, _deps) -> Any: + from .vllm import VLLMInferenceImpl + + impl = VLLMInferenceImpl(config) + await impl.initialize() + return impl diff --git a/llama_stack/providers/impls/vllm/config.py b/llama_stack/providers/impls/vllm/config.py new file mode 100644 index 000000000..df2526f2e --- /dev/null +++ b/llama_stack/providers/impls/vllm/config.py @@ -0,0 +1,35 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel, Field, field_validator + +from llama_stack.providers.utils.inference import supported_inference_models + + +@json_schema_type +class VLLMConfig(BaseModel): + """Configuration for the vLLM inference provider.""" + + model: str = Field( + default="Llama3.1-8B-Instruct", + description="Model descriptor from `llama model list`", + ) + tensor_parallel_size: int = Field( + default=1, + description="Number of tensor parallel replicas (number of GPUs to use).", + ) + + @field_validator("model") + @classmethod + def validate_model(cls, model: str) -> str: + permitted_models = supported_inference_models() + if model not in permitted_models: + model_list = "\n\t".join(permitted_models) + raise ValueError( + f"Unknown model: `{model}`. Choose from [\n\t{model_list}\n]" + ) + return model diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py new file mode 100644 index 000000000..ecaa6bc45 --- /dev/null +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -0,0 +1,356 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import logging +import os +import uuid +from typing import Any + +from llama_models.llama3.api.chat_format import ChatFormat +from llama_models.llama3.api.datatypes import ( + CompletionMessage, + InterleavedTextMedia, + Message, + StopReason, + ToolChoice, + ToolDefinition, + ToolPromptFormat, +) +from llama_models.llama3.api.tokenizer import Tokenizer + +from vllm.engine.arg_utils import AsyncEngineArgs +from vllm.engine.async_llm_engine import AsyncLLMEngine +from vllm.sampling_params import SamplingParams + +from llama_stack.apis.inference import ChatCompletionRequest, Inference + +from llama_stack.apis.inference.inference import ( + ChatCompletionResponse, + ChatCompletionResponseEvent, + ChatCompletionResponseEventType, + ChatCompletionResponseStreamChunk, + CompletionResponse, + CompletionResponseStreamChunk, + EmbeddingsResponse, + LogProbConfig, + ToolCallDelta, + ToolCallParseStatus, +) +from llama_stack.providers.utils.inference.augment_messages import ( + augment_messages_for_tools, +) +from llama_stack.providers.utils.inference.routable import RoutableProviderForModels + +from .config import VLLMConfig + + +log = logging.getLogger(__name__) + + +def _random_uuid() -> str: + return str(uuid.uuid4().hex) + + +def _vllm_sampling_params(sampling_params: Any) -> SamplingParams: + """Convert sampling params to vLLM sampling params.""" + if sampling_params is None: + return SamplingParams() + + # TODO convert what I saw in my first test ... but surely there's more to do here + kwargs = { + "temperature": sampling_params.temperature, + } + if sampling_params.top_k >= 1: + kwargs["top_k"] = sampling_params.top_k + if sampling_params.top_p: + kwargs["top_p"] = sampling_params.top_p + if sampling_params.max_tokens >= 1: + kwargs["max_tokens"] = sampling_params.max_tokens + if sampling_params.repetition_penalty > 0: + kwargs["repetition_penalty"] = sampling_params.repetition_penalty + + return SamplingParams().from_optional(**kwargs) + + +class VLLMInferenceImpl(Inference, RoutableProviderForModels): + """Inference implementation for vLLM.""" + + HF_MODEL_MAPPINGS = { + # TODO: seems like we should be able to build this table dynamically ... + "Llama3.1-8B": "meta-llama/Llama-3.1-8B", + "Llama3.1-70B": "meta-llama/Llama-3.1-70B", + "Llama3.1-405B:bf16-mp8": "meta-llama/Llama-3.1-405B", + "Llama3.1-405B": "meta-llama/Llama-3.1-405B-FP8", + "Llama3.1-405B:bf16-mp16": "meta-llama/Llama-3.1-405B", + "Llama3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct", + "Llama3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct", + "Llama3.1-405B-Instruct:bf16-mp8": "meta-llama/Llama-3.1-405B-Instruct", + "Llama3.1-405B-Instruct": "meta-llama/Llama-3.1-405B-Instruct-FP8", + "Llama3.1-405B-Instruct:bf16-mp16": "meta-llama/Llama-3.1-405B-Instruct", + "Llama3.2-1B": "meta-llama/Llama-3.2-1B", + "Llama3.2-3B": "meta-llama/Llama-3.2-3B", + "Llama3.2-11B-Vision": "meta-llama/Llama-3.2-11B-Vision", + "Llama3.2-90B-Vision": "meta-llama/Llama-3.2-90B-Vision", + "Llama3.2-1B-Instruct": "meta-llama/Llama-3.2-1B-Instruct", + "Llama3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct", + "Llama3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "Llama3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision", + "Llama-Guard-3-1B:int4-mp1": "meta-llama/Llama-Guard-3-1B-INT4", + "Llama-Guard-3-1B": "meta-llama/Llama-Guard-3-1B", + "Llama-Guard-3-8B": "meta-llama/Llama-Guard-3-8B", + "Llama-Guard-3-8B:int8-mp1": "meta-llama/Llama-Guard-3-8B-INT8", + "Prompt-Guard-86M": "meta-llama/Prompt-Guard-86M", + "Llama-Guard-2-8B": "meta-llama/Llama-Guard-2-8B", + } + + def __init__(self, config: VLLMConfig): + Inference.__init__(self) + RoutableProviderForModels.__init__( + self, + stack_to_provider_models_map=self.HF_MODEL_MAPPINGS, + ) + self.config = config + self.engine = None + + tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(tokenizer) + + async def initialize(self): + """Initialize the vLLM inference adapter.""" + + log.info("Initializing vLLM inference adapter") + + # Disable usage stats reporting. This would be a surprising thing for most + # people to find out was on by default. + # https://docs.vllm.ai/en/latest/serving/usage_stats.html + if "VLLM_NO_USAGE_STATS" not in os.environ: + os.environ["VLLM_NO_USAGE_STATS"] = "1" + + hf_model = self.HF_MODEL_MAPPINGS.get(self.config.model) + + # TODO -- there are a ton of options supported here ... + engine_args = AsyncEngineArgs() + engine_args.model = hf_model + # We will need a new config item for this in the future if model support is more broad + # than it is today (llama only) + engine_args.tokenizer = hf_model + engine_args.tensor_parallel_size = self.config.tensor_parallel_size + + self.engine = AsyncLLMEngine.from_engine_args(engine_args) + + async def shutdown(self): + """Shutdown the vLLM inference adapter.""" + log.info("Shutting down vLLM inference adapter") + if self.engine: + self.engine.shutdown_background_loop() + + async def completion( + self, + model: str, + content: InterleavedTextMedia, + sampling_params: Any | None = ..., + stream: bool | None = False, + logprobs: LogProbConfig | None = None, + ) -> CompletionResponse | CompletionResponseStreamChunk: + log.info("vLLM completion") + messages = [Message(role="user", content=content)] + async for result in self.chat_completion( + model=model, + messages=messages, + sampling_params=sampling_params, + stream=stream, + logprobs=logprobs, + ): + yield result + + async def chat_completion( + self, + model: str, + messages: list[Message], + sampling_params: Any | None = ..., + tools: list[ToolDefinition] | None = ..., + tool_choice: ToolChoice | None = ..., + tool_prompt_format: ToolPromptFormat | None = ..., + stream: bool | None = False, + logprobs: LogProbConfig | None = None, + ) -> ChatCompletionResponse | ChatCompletionResponseStreamChunk: + log.info("vLLM chat completion") + + assert self.engine is not None + + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools or [], + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + + log.info("Sampling params: %s", sampling_params) + vllm_sampling_params = _vllm_sampling_params(sampling_params) + + messages = augment_messages_for_tools(request) + log.info("Augmented messages: %s", messages) + prompt = "".join([str(message.content) for message in messages]) + + request_id = _random_uuid() + results_generator = self.engine.generate( + prompt, vllm_sampling_params, request_id + ) + + if not stream: + # Non-streaming case + final_output = None + stop_reason = None + async for request_output in results_generator: + final_output = request_output + if stop_reason is None and request_output.outputs: + reason = request_output.outputs[-1].stop_reason + if reason == "stop": + stop_reason = StopReason.end_of_turn + elif reason == "length": + stop_reason = StopReason.out_of_tokens + + if not stop_reason: + stop_reason = StopReason.end_of_message + + if final_output: + response = "".join([output.text for output in final_output.outputs]) + yield ChatCompletionResponse( + completion_message=CompletionMessage( + content=response, + stop_reason=stop_reason, + ), + logprobs=None, + ) + else: + # Streaming case + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + + buffer = "" + last_chunk = "" + ipython = False + stop_reason = None + + async for chunk in results_generator: + if not chunk.outputs: + log.warning("Empty chunk received") + continue + + if chunk.outputs[-1].stop_reason: + reason = chunk.outputs[-1].stop_reason + if stop_reason is None and reason == "stop": + stop_reason = StopReason.end_of_turn + elif stop_reason is None and reason == "length": + stop_reason = StopReason.out_of_tokens + break + + text = "".join([output.text for output in chunk.outputs]) + + # check if its a tool call ( aka starts with <|python_tag|> ) + if not ipython and text.startswith("<|python_tag|>"): + ipython = True + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), + ) + ) + buffer += text + continue + + if ipython: + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + + buffer += text + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=delta, + stop_reason=stop_reason, + ) + ) + else: + last_chunk_len = len(last_chunk) + last_chunk = text + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=text[last_chunk_len:], + stop_reason=stop_reason, + ) + ) + + if not stop_reason: + stop_reason = StopReason.end_of_message + + # parse tool calls and report errors + message = self.formatter.decode_assistant_message_from_content( + buffer, stop_reason + ) + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), + stop_reason=stop_reason, + ) + ) + + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) + + async def embeddings( + self, model: str, contents: list[InterleavedTextMedia] + ) -> EmbeddingsResponse: + log.info("vLLM embeddings") + # TODO + raise NotImplementedError() diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 6cd97fd73..9b1dc099d 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -104,4 +104,13 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig", ), ), + InlineProviderSpec( + api=Api.inference, + provider_type="vllm", + pip_packages=[ + "vllm", + ], + module="llama_stack.providers.impls.vllm", + config_class="llama_stack.providers.impls.vllm.VLLMConfig", + ), ] From 7abab7604b23856631d611dc7a3921136884f4d4 Mon Sep 17 00:00:00 2001 From: Prithu Dasgupta <168488051+prithu-dasgupta@users.noreply.github.com> Date: Sat, 5 Oct 2024 23:35:54 -0700 Subject: [PATCH 17/69] add databricks provider (#83) * add databricks provider * update provider and test --- .../templates/local-databricks-build.yaml | 10 + .../adapters/inference/databricks/__init__.py | 16 ++ .../adapters/inference/databricks/config.py | 22 ++ .../inference/databricks/databricks.py | 257 ++++++++++++++++++ llama_stack/providers/registry/inference.py | 11 + 5 files changed, 316 insertions(+) create mode 100644 llama_stack/distribution/templates/local-databricks-build.yaml create mode 100644 llama_stack/providers/adapters/inference/databricks/__init__.py create mode 100644 llama_stack/providers/adapters/inference/databricks/config.py create mode 100644 llama_stack/providers/adapters/inference/databricks/databricks.py diff --git a/llama_stack/distribution/templates/local-databricks-build.yaml b/llama_stack/distribution/templates/local-databricks-build.yaml new file mode 100644 index 000000000..754af7668 --- /dev/null +++ b/llama_stack/distribution/templates/local-databricks-build.yaml @@ -0,0 +1,10 @@ +name: local-databricks +distribution_spec: + description: Use Databricks for running LLM inference + providers: + inference: remote::databricks + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda \ No newline at end of file diff --git a/llama_stack/providers/adapters/inference/databricks/__init__.py b/llama_stack/providers/adapters/inference/databricks/__init__.py new file mode 100644 index 000000000..097579d25 --- /dev/null +++ b/llama_stack/providers/adapters/inference/databricks/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .config import DatabricksImplConfig +from .databricks import DatabricksInferenceAdapter + +async def get_adapter_impl(config: DatabricksImplConfig, _deps): + assert isinstance( + config, DatabricksImplConfig + ), f"Unexpected config type: {type(config)}" + impl = DatabricksInferenceAdapter(config) + await impl.initialize() + return impl \ No newline at end of file diff --git a/llama_stack/providers/adapters/inference/databricks/config.py b/llama_stack/providers/adapters/inference/databricks/config.py new file mode 100644 index 000000000..927bb474c --- /dev/null +++ b/llama_stack/providers/adapters/inference/databricks/config.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Optional + +from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel, Field + + +@json_schema_type +class DatabricksImplConfig(BaseModel): + url: str = Field( + default=None, + description="The URL for the Databricks model serving endpoint", + ) + api_token: str = Field( + default=None, + description="The Databricks API token", + ) \ No newline at end of file diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py new file mode 100644 index 000000000..eeffb938d --- /dev/null +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -0,0 +1,257 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import AsyncGenerator + +from openai import OpenAI + +from llama_models.llama3.api.chat_format import ChatFormat + +from llama_models.llama3.api.datatypes import Message, StopReason +from llama_models.llama3.api.tokenizer import Tokenizer +from llama_models.sku_list import resolve_model + +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.utils.inference.augment_messages import ( + augment_messages_for_tools, +) + +from .config import DatabricksImplConfig + +DATABRICKS_SUPPORTED_MODELS = { + "Llama3.1-70B-Instruct": "databricks-meta-llama-3-1-70b-instruct", + "Llama3.1-405B-Instruct": "databricks-meta-llama-3-1-405b-instruct", +} + + +class DatabricksInferenceAdapter(Inference): + def __init__(self, config: DatabricksImplConfig) -> None: + self.config = config + tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(tokenizer) + + @property + def client(self) -> OpenAI: + return OpenAI( + base_url=self.config.url, + api_key=self.config.api_token + ) + + async def initialize(self) -> None: + return + + async def shutdown(self) -> None: + pass + + async def validate_routing_keys(self, routing_keys: list[str]) -> None: + # these are the model names the Llama Stack will use to route requests to this provider + # perform validation here if necessary + pass + + async def completion(self, request: CompletionRequest) -> AsyncGenerator: + raise NotImplementedError() + + def _messages_to_databricks_messages(self, messages: list[Message]) -> list: + databricks_messages = [] + for message in messages: + if message.role == "ipython": + role = "tool" + else: + role = message.role + databricks_messages.append({"role": role, "content": message.content}) + + return databricks_messages + + def resolve_databricks_model(self, model_name: str) -> str: + model = resolve_model(model_name) + assert ( + model is not None + and model.descriptor(shorten_default_variant=True) + in DATABRICKS_SUPPORTED_MODELS + ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(DATABRICKS_SUPPORTED_MODELS.keys())}" + + return DATABRICKS_SUPPORTED_MODELS.get( + model.descriptor(shorten_default_variant=True) + ) + + def get_databricks_chat_options(self, request: ChatCompletionRequest) -> dict: + options = {} + if request.sampling_params is not None: + for attr in {"temperature", "top_p", "top_k", "max_tokens"}: + if getattr(request.sampling_params, attr): + options[attr] = getattr(request.sampling_params, attr) + + return options + + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = None, + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools or [], + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + + messages = augment_messages_for_tools(request) + options = self.get_databricks_chat_options(request) + databricks_model = self.resolve_databricks_model(request.model) + + if not request.stream: + + r = self.client.chat.completions.create( + model=databricks_model, + messages=self._messages_to_databricks_messages(messages), + stream=False, + **options, + ) + + stop_reason = None + if r.choices[0].finish_reason: + if r.choices[0].finish_reason == "stop": + stop_reason = StopReason.end_of_turn + elif r.choices[0].finish_reason == "length": + stop_reason = StopReason.out_of_tokens + + completion_message = self.formatter.decode_assistant_message_from_content( + r.choices[0].message.content, stop_reason + ) + yield ChatCompletionResponse( + completion_message=completion_message, + logprobs=None, + ) + else: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + + buffer = "" + ipython = False + stop_reason = None + + for chunk in self.client.chat.completions.create( + model=databricks_model, + messages=self._messages_to_databricks_messages(messages), + stream=True, + **options, + ): + if chunk.choices[0].finish_reason: + if ( + stop_reason is None + and chunk.choices[0].finish_reason == "stop" + ): + stop_reason = StopReason.end_of_turn + elif ( + stop_reason is None + and chunk.choices[0].finish_reason == "length" + ): + stop_reason = StopReason.out_of_tokens + break + + text = chunk.choices[0].delta.content + + if text is None: + continue + + # check if its a tool call ( aka starts with <|python_tag|> ) + if not ipython and text.startswith("<|python_tag|>"): + ipython = True + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), + ) + ) + buffer += text + continue + + if ipython: + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + + buffer += text + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=delta, + stop_reason=stop_reason, + ) + ) + else: + buffer += text + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=text, + stop_reason=stop_reason, + ) + ) + + # parse tool calls and report errors + message = self.formatter.decode_assistant_message_from_content( + buffer, stop_reason + ) + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), + stop_reason=stop_reason, + ) + ) + + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) \ No newline at end of file diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 9b1dc099d..ddfd4ff40 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -104,6 +104,17 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig", ), ), + remote_provider_spec( + api=Api.inference, + adapter=AdapterSpec( + adapter_type="databricks", + pip_packages=[ + "openai", + ], + module="llama_stack.providers.adapters.inference.databricks", + config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig", + ), + ), InlineProviderSpec( api=Api.inference, provider_type="vllm", From cfe3ad33b30e0afdbd4b2392b6bcf6090e3cb160 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sun, 6 Oct 2024 11:45:35 -0700 Subject: [PATCH 18/69] fix db path --- .../distribution/templates/docker/llamastack-local-cpu/run.yaml | 2 +- .../distribution/templates/docker/llamastack-local-gpu/run.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml index f740897f3..9d92b864d 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -23,7 +23,7 @@ api_providers: persistence_store: namespace: null type: sqlite - db_path: /home/xiyan/.llama/runtime/kvstore.db + db_path: ~.llama/runtime/kvstore.db memory: providers: - meta-reference diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml index bb7a2cc0d..0004b1780 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml @@ -23,7 +23,7 @@ api_providers: persistence_store: namespace: null type: sqlite - db_path: /home/xiyan/.llama/runtime/kvstore.db + db_path: ~/.llama/runtime/kvstore.db memory: providers: - meta-reference From 27587f32bc99b62c7b5e7ecbd8f4e44e3f9e16b3 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Sun, 6 Oct 2024 11:46:08 -0700 Subject: [PATCH 19/69] fix db path --- .../distribution/templates/docker/llamastack-local-cpu/run.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml index 9d92b864d..62b615a50 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -23,7 +23,7 @@ api_providers: persistence_store: namespace: null type: sqlite - db_path: ~.llama/runtime/kvstore.db + db_path: ~/.llama/runtime/kvstore.db memory: providers: - meta-reference From f4f7618120c1f8b820c2b4f413842f4ad91638bf Mon Sep 17 00:00:00 2001 From: Zain Hasan Date: Mon, 7 Oct 2024 01:21:50 -0400 Subject: [PATCH 20/69] add Weaviate memory adapter (#95) --- .../adapters/memory/weaviate/__init__.py | 8 + .../adapters/memory/weaviate/config.py | 18 ++ .../adapters/memory/weaviate/weaviate.py | 192 ++++++++++++++++++ llama_stack/providers/registry/memory.py | 9 + 4 files changed, 227 insertions(+) create mode 100644 llama_stack/providers/adapters/memory/weaviate/__init__.py create mode 100644 llama_stack/providers/adapters/memory/weaviate/config.py create mode 100644 llama_stack/providers/adapters/memory/weaviate/weaviate.py diff --git a/llama_stack/providers/adapters/memory/weaviate/__init__.py b/llama_stack/providers/adapters/memory/weaviate/__init__.py new file mode 100644 index 000000000..b564eabf4 --- /dev/null +++ b/llama_stack/providers/adapters/memory/weaviate/__init__.py @@ -0,0 +1,8 @@ +from .config import WeaviateConfig + +async def get_adapter_impl(config: WeaviateConfig, _deps): + from .weaviate import WeaviateMemoryAdapter + + impl = WeaviateMemoryAdapter(config) + await impl.initialize() + return impl \ No newline at end of file diff --git a/llama_stack/providers/adapters/memory/weaviate/config.py b/llama_stack/providers/adapters/memory/weaviate/config.py new file mode 100644 index 000000000..db73604d2 --- /dev/null +++ b/llama_stack/providers/adapters/memory/weaviate/config.py @@ -0,0 +1,18 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel, Field + +class WeaviateRequestProviderData(BaseModel): + # if there _is_ provider data, it must specify the API KEY + # if you want it to be optional, use Optional[str] + weaviate_api_key: str + weaviate_cluster_url: str + +@json_schema_type +class WeaviateConfig(BaseModel): + collection: str = Field(default="MemoryBank") diff --git a/llama_stack/providers/adapters/memory/weaviate/weaviate.py b/llama_stack/providers/adapters/memory/weaviate/weaviate.py new file mode 100644 index 000000000..abfe27150 --- /dev/null +++ b/llama_stack/providers/adapters/memory/weaviate/weaviate.py @@ -0,0 +1,192 @@ +import json +import uuid +from typing import List, Optional, Dict, Any +from numpy.typing import NDArray + +import weaviate +import weaviate.classes as wvc +from weaviate.classes.init import Auth + +from llama_stack.apis.memory import * +from llama_stack.distribution.request_headers import get_request_provider_data +from llama_stack.providers.utils.memory.vector_store import ( + BankWithIndex, + EmbeddingIndex, +) + +from .config import WeaviateConfig, WeaviateRequestProviderData + +class WeaviateIndex(EmbeddingIndex): + def __init__(self, client: weaviate.Client, collection: str): + self.client = client + self.collection = collection + + async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray): + assert len(chunks) == len(embeddings), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}" + + data_objects = [] + for i, chunk in enumerate(chunks): + + data_objects.append(wvc.data.DataObject( + properties={ + "chunk_content": chunk, + }, + vector = embeddings[i].tolist() + )) + + # Inserting chunks into a prespecified Weaviate collection + assert self.collection is not None, "Collection name must be specified" + my_collection = self.client.collections.get(self.collection) + + await my_collection.data.insert_many(data_objects) + + + async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse: + assert self.collection is not None, "Collection name must be specified" + + my_collection = self.client.collections.get(self.collection) + + results = my_collection.query.near_vector( + near_vector = embedding.tolist(), + limit = k, + return_meta_data = wvc.query.MetadataQuery(distance=True) + ) + + chunks = [] + scores = [] + for doc in results.objects: + try: + chunk = doc.properties["chunk_content"] + chunks.append(chunk) + scores.append(1.0 / doc.metadata.distance) + + except Exception as e: + import traceback + traceback.print_exc() + print(f"Failed to parse document: {e}") + + return QueryDocumentsResponse(chunks=chunks, scores=scores) + + +class WeaviateMemoryAdapter(Memory): + def __init__(self, config: WeaviateConfig) -> None: + self.config = config + self.client = None + self.cache = {} + + def _get_client(self) -> weaviate.Client: + request_provider_data = get_request_provider_data() + + if request_provider_data is not None: + assert isinstance(request_provider_data, WeaviateRequestProviderData) + + # Connect to Weaviate Cloud + return weaviate.connect_to_weaviate_cloud( + cluster_url = request_provider_data.weaviate_cluster_url, + auth_credentials = Auth.api_key(request_provider_data.weaviate_api_key), + ) + + async def initialize(self) -> None: + try: + self.client = self._get_client() + + # Create collection if it doesn't exist + if not self.client.collections.exists(self.config.collection): + self.client.collections.create( + name = self.config.collection, + vectorizer_config = wvc.config.Configure.Vectorizer.none(), + properties=[ + wvc.config.Property( + name="chunk_content", + data_type=wvc.config.DataType.TEXT, + ), + ] + ) + + except Exception as e: + import traceback + traceback.print_exc() + raise RuntimeError("Could not connect to Weaviate server") from e + + async def shutdown(self) -> None: + self.client = self._get_client() + + if self.client: + self.client.close() + + async def create_memory_bank( + self, + name: str, + config: MemoryBankConfig, + url: Optional[URL] = None, + ) -> MemoryBank: + bank_id = str(uuid.uuid4()) + bank = MemoryBank( + bank_id=bank_id, + name=name, + config=config, + url=url, + ) + self.client = self._get_client() + + # Store the bank as a new collection in Weaviate + self.client.collections.create( + name=bank_id + ) + + index = BankWithIndex( + bank=bank, + index=WeaviateIndex(cleint = self.client, collection = bank_id), + ) + self.cache[bank_id] = index + return bank + + async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: + bank_index = await self._get_and_cache_bank_index(bank_id) + if bank_index is None: + return None + return bank_index.bank + + async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]: + + self.client = self._get_client() + + if bank_id in self.cache: + return self.cache[bank_id] + + collections = await self.client.collections.list_all().keys() + + for collection in collections: + if collection == bank_id: + bank = MemoryBank(**json.loads(collection.metadata["bank"])) + index = BankWithIndex( + bank=bank, + index=WeaviateIndex(self.client, collection), + ) + self.cache[bank_id] = index + return index + + return None + + async def insert_documents( + self, + bank_id: str, + documents: List[MemoryBankDocument], + ) -> None: + index = await self._get_and_cache_bank_index(bank_id) + if not index: + raise ValueError(f"Bank {bank_id} not found") + + await index.insert_documents(documents) + + async def query_documents( + self, + bank_id: str, + query: InterleavedTextMedia, + params: Optional[Dict[str, Any]] = None, + ) -> QueryDocumentsResponse: + index = await self._get_and_cache_bank_index(bank_id) + if not index: + raise ValueError(f"Bank {bank_id} not found") + + return await index.query_documents(query, params) \ No newline at end of file diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index 4687e262c..a5f302d4f 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -56,6 +56,15 @@ def available_providers() -> List[ProviderSpec]: config_class="llama_stack.providers.adapters.memory.pgvector.PGVectorConfig", ), ), + remote_provider_spec( + Api.memory, + AdapterSpec( + adapter_id="weaviate", + pip_packages=EMBEDDING_DEPS + ["weaviate-client"], + module="llama_stack.providers.adapters.memory.weaviate", + provider_data_validator="llama_stack.providers.adapters.memory.weaviate.WeaviateRequestProviderData", + ), + ), remote_provider_spec( api=Api.memory, adapter=AdapterSpec( From 42637644937b55ea752991a2c14a0a6a28b72722 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 06:46:32 -0700 Subject: [PATCH 21/69] Fix adapter_id -> adapter_type for Weaviate --- llama_stack/providers/registry/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index a5f302d4f..a3f0bdb6f 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -59,7 +59,7 @@ def available_providers() -> List[ProviderSpec]: remote_provider_spec( Api.memory, AdapterSpec( - adapter_id="weaviate", + adapter_type="weaviate", pip_packages=EMBEDDING_DEPS + ["weaviate-client"], module="llama_stack.providers.adapters.memory.weaviate", provider_data_validator="llama_stack.providers.adapters.memory.weaviate.WeaviateRequestProviderData", From a4e775c465af4c3893302a3617cca72cef0a3e49 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 7 Oct 2024 11:40:04 -0400 Subject: [PATCH 22/69] download: improve help text (#204) --- llama_stack/cli/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index 4d0966bb2..a1495cbf0 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -169,7 +169,7 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): meta_url = args.meta_url if not meta_url: meta_url = input( - "Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): " + "Please provide the signed URL you received via email after visiting https://www.llama.com/llama-downloads/ (e.g., https://llama3-1.llamameta.net/*?Policy...): " ) assert meta_url is not None and "llamameta.net" in meta_url _meta_download(model, meta_url, info) From 53d440e952059bdd62736ab6b65df33a1a0773f6 Mon Sep 17 00:00:00 2001 From: Mindaugas Date: Mon, 7 Oct 2024 18:55:06 +0300 Subject: [PATCH 23/69] Fix ValueError in case chunks are empty (#206) --- .../impls/meta_reference/agents/agent_instance.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 9db6b79b5..661da10cc 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -673,7 +673,7 @@ class ChatAgent(ShieldRunnerMixin): async def _retrieve_context( self, session_id: str, messages: List[Message], attachments: List[Attachment] - ) -> Tuple[List[str], List[int]]: # (rag_context, bank_ids) + ) -> Tuple[Optional[List[str]], Optional[List[int]]]: # (rag_context, bank_ids) bank_ids = [] memory = self._memory_tool_definition() @@ -722,12 +722,13 @@ class ChatAgent(ShieldRunnerMixin): chunks = [c for r in results for c in r.chunks] scores = [s for r in results for s in r.scores] + if not chunks: + return None, bank_ids + # sort by score chunks, scores = zip( *sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True) ) - if not chunks: - return None, bank_ids tokens = 0 picked = [] From 2366e188739bc5c65f72644541a80c0a99eb4f49 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 10:21:26 -0700 Subject: [PATCH 24/69] refactor docs (#209) --- CONTRIBUTING.md | 4 ++-- README.md | 17 +++++++++++++++-- docs/cli_reference.md | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 122080e9c..5948e7110 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to Llama-Models +# Contributing to Llama-Stack We want to make contributing to this project as easy and transparent as possible. @@ -32,7 +32,7 @@ outlined on that page and do not file a public issue. * ... ## Tips -* If you are developing with a llama-models repository checked out and need your distribution to reflect changes from there, set `LLAMA_MODELS_DIR` to that dir when running any of the `llama` CLI commands. +* If you are developing with a llama-stack repository checked out and need your distribution to reflect changes from there, set `LLAMA_STACK_DIR` to that dir when running any of the `llama` CLI commands. ## License By contributing to Llama, you agree that your contributions will be licensed diff --git a/README.md b/README.md index a5172ce5c..050a71aff 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,24 @@ cd llama-stack $CONDA_PREFIX/bin/pip install -e . ``` -## The Llama CLI +## Documentations -The `llama` CLI makes it easy to work with the Llama Stack set of tools, including installing and running Distributions, downloading models, studying model prompt formats, etc. Please see the [CLI reference](docs/cli_reference.md) for details. Please see the [Getting Started](docs/getting_started.md) guide for running a Llama Stack server. +The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please find the following docs for details. + +* [CLI reference](docs/cli_reference.md) + * Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution. +* [Getting Started](docs/getting_started.md) + * Guide to build and run a Llama Stack server. +* [Contributing](CONTRIBUTING.md) ## Llama Stack Client SDK +| **Language** | **Client SDK** | **Package** | +| :----: | :----: | :----: | +| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) +| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | +| Node | [llama-stack-client-node](https://github.com/meta-llama/llama-stack-client-node) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client) +| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | + Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 8e5feeb6b..0b5e73fb9 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -1,6 +1,6 @@ # Llama CLI Reference -The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. +The `llama` CLI tool helps you setup and use the Llama Stack & agentic systems. It should be available on your path after installing the `llama-stack` package. ### Subcommands 1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. From 996efa9b425e7cc6a083b0e66f4e0131dd4c7c2c Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 7 Oct 2024 13:26:52 -0400 Subject: [PATCH 25/69] README.md: Add vLLM to providers table (#207) Signed-off-by: Russell Bryant --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 050a71aff..a8a5f5e69 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ A Distribution is where APIs and Providers are assembled together to provide a c | Chroma | Single Node | | | :heavy_check_mark: | | | | PG Vector | Single Node | | | :heavy_check_mark: | | | | PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | +| [vLLM](https://docs.vllm.ai/en/latest/) | Single Node | | :heavy_check_mark: | | | ### Distributions | **Distribution Provider** | **Docker** | **Inference** | **Memory** | **Safety** | **Telemetry** | From 16ba0fa06fd166d1c5a8d43fed9c5734a7c34f29 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 11:24:27 -0700 Subject: [PATCH 26/69] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index a8a5f5e69..050a71aff 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,6 @@ A Distribution is where APIs and Providers are assembled together to provide a c | Chroma | Single Node | | | :heavy_check_mark: | | | | PG Vector | Single Node | | | :heavy_check_mark: | | | | PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | -| [vLLM](https://docs.vllm.ai/en/latest/) | Single Node | | :heavy_check_mark: | | | ### Distributions | **Distribution Provider** | **Docker** | **Inference** | **Memory** | **Safety** | **Telemetry** | From e4ae09d090eeb793f65efdfdc0e647bf076018f3 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 7 Oct 2024 22:38:43 -0400 Subject: [PATCH 27/69] Add .idea to .gitignore (#216) Signed-off-by: Yuan Tang --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2465d2d4e..d0a5f0056 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ xcuserdata/ Package.resolved *.pte *.ipynb_checkpoints* +.idea From 4d5f7459aab775464efb4c9adfddd90c5e600ae4 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 19:42:39 -0700 Subject: [PATCH 28/69] [bugfix] Fix logprobs on meta-reference impl (#213) * fix log probs * add back LogProbsConfig * error handling * bugfix --- llama_stack/apis/inference/client.py | 26 +++++++++++++++---- .../meta_reference/inference/generation.py | 2 +- .../meta_reference/inference/inference.py | 15 ++++++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 5cfae633c..fffcf4692 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -6,7 +6,6 @@ import asyncio import json -import sys from typing import Any, AsyncGenerator, List, Optional import fire @@ -101,7 +100,9 @@ class InferenceClient(Inference): print(f"Error with parsing or validation: {e}") -async def run_main(host: str, port: int, stream: bool, model: Optional[str]): +async def run_main( + host: str, port: int, stream: bool, model: Optional[str], logprobs: bool +): client = InferenceClient(f"http://{host}:{port}") if not model: @@ -111,13 +112,27 @@ async def run_main(host: str, port: int, stream: bool, model: Optional[str]): content="hello world, write me a 2 sentence poem about the moon" ) cprint(f"User>{message.content}", "green") + + if logprobs: + logprobs_config = LogProbConfig( + top_k=1, + ) + else: + logprobs_config = None + iterator = client.chat_completion( model=model, messages=[message], stream=stream, + logprobs=logprobs_config, ) - async for log in EventLogger().log(iterator): - log.print() + + if logprobs: + async for chunk in iterator: + cprint(f"Response: {chunk}", "red") + else: + async for log in EventLogger().log(iterator): + log.print() async def run_mm_main( @@ -149,13 +164,14 @@ def main( port: int, stream: bool = True, mm: bool = False, + logprobs: bool = False, file: Optional[str] = None, model: Optional[str] = None, ): if mm: asyncio.run(run_mm_main(host, port, stream, file, model)) else: - asyncio.run(run_main(host, port, stream, model)) + asyncio.run(run_main(host, port, stream, model, logprobs)) if __name__ == "__main__": diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 4351a3d56..27e086e0f 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -297,7 +297,7 @@ class Llama: token=next_token[0].item(), text=self.tokenizer.decode(next_token.tolist()), logprobs=( - token_logprobs[:, prev_pos + 1 : cur_pos + 1][0].tolist() + token_logprobs[:, cur_pos : cur_pos + 1][0].tolist() if logprobs else None ), diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index e89d8ec4c..dca4ea6fb 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -132,7 +132,20 @@ class MetaReferenceInferenceImpl(Inference, RoutableProvider): if not request.stream: if request.logprobs: - logprobs.append(token_result.logprob) + assert ( + len(token_result.logprobs) == 1 + ), "Expected logprob to contain 1 result for the current token" + assert ( + request.logprobs.top_k == 1 + ), "Only top_k=1 is supported for LogProbConfig" + + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ) continue From 48d0d2001eaa7b2cb82bef603a2921736c04b657 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Tue, 8 Oct 2024 09:55:16 -0400 Subject: [PATCH 29/69] Add classifiers in setup.py (#217) * Add classifiers in setup.py * Update setup.py * Update setup.py --- setup.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae1f58015..4db636872 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,16 @@ setup( long_description_content_type="text/markdown", url="https://github.com/meta-llama/llama-stack", packages=find_packages(), - classifiers=[], + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", + ], python_requires=">=3.10", install_requires=read_requirements(), include_package_data=True, From 2d4f7d8acfc961b18add259c1f6ef2aef7831a04 Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:30:40 -0400 Subject: [PATCH 30/69] Create SECURITY.md --- SECURITY.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..fc58b67d3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please report vulnerabilities to our bug bounty program at https://bugbounty.meta.com/ From ce70d21f6568f8ba9d84462b14773e44bd715dac Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 8 Oct 2024 15:29:19 -0700 Subject: [PATCH 31/69] Add files via upload --- docs/resources/prompt-format.png | Bin 0 -> 173947 bytes 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 docs/resources/prompt-format.png diff --git a/docs/resources/prompt-format.png b/docs/resources/prompt-format.png new file mode 100644 index 0000000000000000000000000000000000000000..afcd076226c79466a484d67d0f5efcfda57da041 GIT binary patch literal 173947 zcmeFZcUV(f6E_Mdq9|g8BM68J2ndlbC4itJO{&sCL`vwrgc4B!rArl%PUuB?FE)@G zdI`PP&_d`5-->$P@||<;bN{{1eR-acu(S7GYtPJ@HS?R_@J{)$3>76EB?$=$mF%PY zsw5=j2_z)PNlu;sM+`RQubt_dm^N1Zx|)vv_7H7PqMs`R4%*`!1;+r{l2nR=o9}}IjS$X-jWYrPr7tj zFZ97ZJ=)VL&)po`-hU9XZ$$%MmXWaWQEqlR8vo>SGwn)9w$+H zUi{z`NodJ=QD{K;%Wuq;%$6Jzo8%{NtTpfm2m}S(38P)OAujdCbc%0^aDi>ohG+UF z&(O;8Q>Z-Sr|Z^-XACLbF-MgQM>?u%->|}t=UG_%YhH!E_gxv1w2uvWLvH6qz_!hI z?4#^c200xQgsgEyc^BSH8DpyN>%J})(EaLW<eH8 z)C+5q=WnB5U{-vk}2U*kW<$*G$ zr|z=KFU+OSMUk|P3{~FPaDKMUEyaKM%1WM=#2IxyQ~T4N6G`Xw6O#QlpVOD$T+N^) zL1ToUtMq@gniC;CvnhY=Sf=EuOj52S37r^$PZS-GzK=C9W!=4yOLjH%q^kGH+!M>6 zX+kL|8@NAFN4&DPJ+A)x;>0Dh6Y~u$wp1fzdP%fm$EBZLsv)^J0sY{6?Igu}X3DhV zS<)h*=U<%VxhC_2VbkYT(xY26axagE{-I56l&$Z>;LHlJ(P&mEE_ z+$W8=Re zjC-HP_dj9zjg^~-B)>8bsE-n=O!TXTh=uP3*Pj}9LINoNiz?lAr!=;|L zmHvz01D}r>6>8EwrM7sD`||L4_^0JhK7-POCoJj9=tW<}f19+GyL;o~u@0uDM&qeX zTMFAIJ6bzvg~|v;%_V&Xo8X-WnMv~9s~$XcGzil1*9hN4)*lxfPdXn*kVSur`V_AG zz>H;tIq*XIJNx_4lw&+^KHMLXy3O2wvETy5I~!zL8b8t!Nrv22jYG<%J(kmpriFUGzrkPVhCZq7lX zNpj4xiPGDXDhIQDXj0uyA;l(^2 z!ozGV6Dlm9%QPRWGHhJN_z9uHapI3%3^_Y(@O6wWJkZwbT26}II*E%h-tU@ zaA#M7vh(yK>Rpy|^naYwq}k>FaofyOS>_JQUDgW0UC+1jKRQDFlJ}FRI=An{d|LPv z`zgz|d_847`7QE1{dMaH95S0XbW&_p3{_-OSv4vO2FsuGqd7z%RW``676DG1Tvh$Z zq9u<&g)^daj<;C_#mWS?`GynqniB$zzB2^IO;p%#JLucSSGreDi&5yGU(>3Q_E0(0 zd?o!Q;myYVjBiulqCGUc*pHmTG*sQX)r%0Dg)=tnlk8MkyS;xiYDY@wb8L9 z*2(YhDBX>)j;@1vO3mC5i6u48Z8K|5yWZYqRamzxx?vk-^i-?gug@??so=fN4NV94 zyh$|1BGK6W#Zk%G+Th}R&)UX)u4O-=J3{J~Zv-2yEXUJI*%XKUzKH)9Ig^o40CaMudCwR^&?@{J$Ac$XKu~29AYva z^t^Ry6aNiAg_mZjW8tDGp z3^hbXx6XxSU2gtKV$MK;n{M^U=amZA(aHTvdS{`Iu@@tjb;h1Ki)DS7MuR*rtCf;f z%PJ#>3}kCrPN}=rjt^r-g1pPi7-#~L(N&cS(_8}0%x?S+m;so4=m#_z^J9=XE3&dk}G0vKzBs9Ls|( zbeLwG)2@GLK{2~F3)5l-gshkP;9Nk|aZa`6w^g3XD5inSsM?$G`SDf*s zP!q{D88XRx`;=4ul?m{tNtH*W#m5O3QMl3K=TwrO^dyA1_p6OpUW(jzI?u{X_tN$) z-%GCBH$}#eI<}Yio@-N+9JHT-iZC1_use7j8Y~g!Pu^5H5+s~nzCiBbwYK{a1h4+a znzAN}iX+?g>TO`MR9w#Fq@q>ZL_t!a(!8`FM6ugMn z{CYnY=1W2W{<;8Ou1REno=u*RbnMU1#2b*1+*6a5l?Cr=M)t^n zA89#|kkGIYU!=0CH`l@a2Vv@(j+%-JB1Se=+=ee~o*Q$!TGzazn*Q9OijjsuE-{HPo+S{W@`=fBpQLr?D&S-y>N&{C+I(fDqz05FYM3 zkbm79Tq;I9DxwT?HMZ2c53>R?17nEu+~MUH`+31%zWVo&|8c42e_YDXclSRp{g1Ey z?@OOJ7~4zRSb;$u#s96a-xvSqm%lF*gAkwnKVlJc#+9u0Q8G_s8E44kq;D$;$?Z{EwDr{%&%_ zV=o){n*L#8*Ir8IlO0cRn0;aNkFz_TAbjE<9-Z{K{pD-xA353RrTxNWABDg@^w5d$9r8c$)_Q(_@i%>yn=on{R1u{Kwgy6mvN9cl!8wY=%Q}LRjOm zpzlA)Msgy?=${@-@|p(FX{CJ;&p(+3QRT<}QKq?yVHqrU?UpK^;g>fH(2J4hD==llZMnrA<}Ta# zopJg;c^!qwGagPWgI$h`-7L=&c#Y-w_2e(&!cf}07Oig0_BHB7_|jRCl$GBq4^;rG zl9w$?>Zb)InUism^HYl~T?ZVciZ&~;GUN_f|DvP4}eviSjSN42?O;1t`( zXZ_k1d6}JG{iucL(5d~ZwKK*2BWQE4qN|WdON3Ra*Dayn61z#1*oZ&N`JcWa(_d1a zRFMqzBrA$l)G8|>Hn%g0k4D>!*`a}y;y%)S>K$8~=guf?Ez04#{7Y_>M@dN@yxCNK zb^_aS@`bQI#}I)d^|StO@h2q!=?yqw-n$UR=Pk(GsVW)X;9zz21XrJ$)l5;L{d*Qk z4cT|#4lIg;8g8V)(VK!EuYQXj$09jVS+^80a-F#LOYV6`k;TfAq3HalLlB3lizAiA zF-F0{ZGl(#+O`&Z+8t0(Q^ML20p+p3n89MKq3>pl&5*!1-!AP{H5qf87l50M>Dbg7 zM`UEFWkl&_7}R^26n>{EPQ)K#QFE_ObKl|2D+$}fCLUQn)ifj*JsLkElvgfg=d@L? z)#=#N!%PT-oq8e9{VkrQ#SF0y-qzb?mObf;d^;l!AzfcFQ;QR?se?@v6U2sUfGwD` za+UMv++m=^_i=cUI@(Z0dMI#5Uhn?#;#J$yx+%TvZj1 zUh9M_RgYCI=UX`DM#!PW4|3gB3Y+9SFSW}T*XubCvB2L>$6DmW<`NMEGf@bzwk{sr znEP6c=xW7{ecg-93CvCM{T?*NA`?Nrx(lP#<$D+#gasH(9E%$;hzpS*435~ha?SbB z>ffsT{^Y6F_p%;jv_3kR^N4V;MjxY(z~fk=m6lWdY+Ak}2MiFk&8}OgZ-06@WY@$n zdVQ{RK%b9VY+Z6o|6-Su@8_ppc2R4X_-7gJEWJQ}m^|66-jO2Br9a~$M;t|Vns zGA%L`{81km*K97vG@%Gd*ldNg=U1k)b+ogPmj{b&{xExQ9HrIx(V;`Q%Y8Q0Vj#ckHR&h? zaBgfL35pg^pS*diY^hgLS2GYkU3vQjV2Phkau89(*w6bXe0d6YCh& zf{b|Q*sedm%PncSgD2oWB@(R-DQIwbv=@=s38&VB@ULGks=rNI_4{$<8PsIwvogG* zx%Fc8bZg&+h%9BA#qf39{W=zTuKgBIGE>|(+M-}#^{sNWD`6YhVM;aI!w^QRD9z{Z zZc*%aJ(6H#f3pAcITQbU&L>4HII=Y8ATwBBV>Ndo_( zF_^)kSJY+FhqkOd$6_~Nt?nQ{vkAMDQ(*0T=E?p6b=cJaq7-vJO{=1g(G}-Vr(X|?*V_)f>KZ|$jf7g=47`M8O zS0&}6l1`e4vYvGLYHVg~g`EjkDjOD_`(U)vNxt3QhS2xKJrS+(gDV_lMFbqVbuWf) z!iJe~3C6YUg22I-_G_1E(sdwp%B`KXkN5|sg>NSMF?qT2*Z3}}e$A;!W8ynBG~&M< z=yltc{TttwYjc=V^ms`D9k)q6ew$Mp12&RRR&|~S+s;3(mKsQ`=O*l%CVDz|i7XA} zj!os{mBlW0CQEO{1n+nsqd@N@b5rWu6t@d@3EI?bJapH$=i}%1_ia__u7p)%%8GmX z?m~R7Cbn4qu#1{?Jq1HV=DX;;JAD18T|EZtGA_rS^4GewAxri)#nGv+-HVpPS@xR( zul3PwHd15cxwFFDcUl86I1}}m@9sB)=>%GWktb_RH zQ`PrMANUH-RGv2i`c7hc;113T7VR1n?g3n7p8_H%Kj7BJ$E~HOpB>=}5J7@S$!%u8 ztIx;BZNE}EkLc`7g4J)aRp0&g{Ed|lsMX>cG8FUd>AamzoNml$P9u%;XLvg&t!sx7 z0><=(X6g#O<}P(@dV5B_bu5e_#Os>AmGr@Oc;m7D3Y0mM+=h{cQB)2^UYKuTbS=r-wBWv%6=6J1m(^3E{J@@93i} zh(fFPP`L3c>XaNle{Frlc_}vR=T_94C22OH^&RKFvRDg;<}_hnQ;VB1!WWp^&BLs# z7FqcYR*Ge?0t!Bi74u>6GT_8ESM#VuICoc6#ny8h`Y`3=tKuE_C#1&ELwRT-ZsObc z;hc~&Q~p5O@FZ~6)@;+He9&vQehkjG0hePINqv@*vu1X_MNB`{vAUd>I)PuG{F=Js z25kLv$!w1Z(G;P1%c|WC(NWax8N(WnUUKJ|E^JI~W)6gU>@Ty$*I_$EQKjO`X@%LYN27DBY4#^QPB53RjXZFMG^DTbw`9+5mRUkvSo`DWEk~zh|z;KxnaK z^?K)PF+C2az$(b(y9pIVw+55cM~Y+SMbxc30WU^lH0eVH!xf&W%M$UjB|mts_ZDZdazNMu3bxs3JP zd(_=MHCU8Hy;+-H`A2tLlS*Gr2`OVxYK~->yx008>=g9YsJ)iKG?Zs_`;xK-L@o$6 zoUbP;G<+iZ@{3@KwnICM(oY6nw64L_)Dv?u^bmzzXbSeEm|qlXmffn>R1cMx$iF^I z)M~%d#iY}?Gs611Ln9Wk9#u}=bxYiDh&xG6E!eu}ULHI9b}Tw>-CY)e>(elqk9XnL zRyRsrD;u`%sySGz9}m1L%$vzj8T8GsaJNZl9#TNNn4s-ma) zw(eqE!~S0=n-pPbh{%1ek=COmF`8S{!m*miuxU8;?Om6<;w{(ovDcirpI&+1iDL@w zo2QJ`2b)yvZOSBb1U&PZ!(!!p0v`xp18B0;CqVdPBM97?PRTg6_-@0A?5#Tj$kULT zPVsM@TuJ)vz&}}irS^!VgA5;ug)}&r?diJVV$;X4no7fu=yqFxPatzlPQYDojmkir ziK4uIFlUW+dAGyc&+Eqs&m3}{4jP?uZZoBd5AU1ZUoH@GXcvec&4-&>+@d=A{1$OA zJnrv%Y!fk*GIS^E_uOC}Y9uvW+! zuPX90tUYnAVkS(!?aOJ%)ySLTCYxO4BhN(~7tVR+Gj_tCX*eu?Fbd|Wb>9i5al@1j zw5iB27>{$l6+?+(2NzYnSIv;T7b7Xf4(St8f(ZX7i@QKG_a2{y(NJ~La+qEwYWoM`F>ny2pgqxlw5Lx z*0J0c`B;yr7WZzq4Wq4v)DUjCtyvaE*Llb|T|Sz-OQvWCVd;-pEsN=UP#c50qZFO4 z5OV>VRNTAUTk5E9zPft`+VSzVe(_a^?65(NKIfA)FX4YVfJTrzjO;ooocbx4CEaUd zsamx?ThAnMV|`!0(&$(grd-&U)-elIg+5X)e{ulJLPiypw^q9m?lzfzwDZ-1AtOg* z1mAj&*fm|jZ92i;KcL|Owpy>{iR+6KYsgjgHa&W3H`|Ihr&aHTZv&y&a*>5W@ri_K zZ#oIhwH>*Ej!oifQ97W`+PS?9(~Ax}QCH$&CO%rH-jr<-Eq=HzJ*?xn)SG4fbc=2& zMxY`4Oj@&P#N^9&yPwXDdmZFaJ+C9|FFRY9m%lziRn(N>m8qh7&8-&KU)-}V<8r1V zSzzj`I@D))YEvB^y_oEg7GQ@S(?Ve9#ww{`9UJcPbJn4OuJ zWJ$)K{`UG?`(@M3nP$OHGf{Ts8JwDVcJFZYa6U-oOxS~3m(hwmZ5zVgd^t6H90+Mg zOakHJ%l)3X`ybs`E89T|N#{AYbJ*7=N1h-M<6P{V-1QIGw;o0C72&&UmUQ;Gbxrp z5Lex5HQWb82=SL<5x{xJ78*5~^J10h(GL&vkGHGQ@tG)uGqr*(gi=^sI;7Y@p0HdG z3y!{xDbU8PMn}SaLmp<90OJVKtB*KKglLlP9dChtYJnuYvRY!F-`RHbzKE#YEqRr( zRJ&3@ySDg;K@-9tufd^mA}~=1A!0G1HN4q`#FcZoODHGGObC~Fv}l0PK9`n>_p=MT z03L*g9~hhmKYLv&s^1mFGygG_2wQnZI|H8X#(J&?OS@^!8Q6FZmkme++w*Ct#Vp){ z3a%b~ZW#a3#DuWi9x_cdd&D+rp zCE0$v`tF!n@uGr_O?F$vYUMl{BRVfehC*vS+AGl~ip~Y(o3Ty%XS*~y~0SPQggmFFU^c4v$a zz$r-*Ic8O1^T^I=zL~q694{snyFuo4&?BCsvbIsQ$@m+|qtgIk>~udD%TLhIOqq1_ zaltchviml5d+BaKZ#uWL8ogKh^Wr?;ey}b-0-kT>ZZ=#ojDHSW)XHt)cc)W?@+|wH z9;d9d#jR^d$^CEdF6uyaPIHxnF81X_J9Vxz=EY03(rT|IFZ+n7!x6~|$+J0gaFNFX zCBg99%N||buVn;0f*P#{LS*QwWT^2ued<}nI6_OQNekNNp2rw6bNcsWV*xg6ZT{uz zZ=3%M3hR9a_zS*?LXO`i^cT2MQVpyUMa2()2JwG^1Viruahwo)`rx0iMl%3ZR%V@Y z_=U`5Uh&?TZW>U~|8erykT7z=aSv=s&L^=yt@tgl;uBeq`~fERm(hq(aj!o(K|AZz z=0Bal|Gzu@)Z_o;@XLPwCx`#9$$_%}8&A_!BXp^S>1y49H5Np;>b9trY*jL-`!;{| zYL!L!D0Ji7#W@gF+wdk{X+PcjiVah?Co-jWJnUp=aM-QFGh}%;0HDsGVdeBAlzAIX zh|;m<0kEd!90Pt9g5Zf4brB3_Rpc!n_i~@Z^nE=+hu_sTOZI8=rMEVV(zTZ*=F-C) zU4`U@h;0Z5NX1q0$}^)X4>K>i2~sS2#$Q+8ex&O&7b3zmjB3 z5IMS~7{iP7sTIIigkc2b#pC8a3ZLu^Cc$LF^g74lV((Q?@{>8=T`KiXWR!Nxwz=(tZk_`D)#C~AkE#Z86Xb6fzPSR0 zmy9k}*iE+y4Csb)9E&tB*TTkRE8h;3^dUEs`94vy^PGB201}FG%~C(GsJj%j2eUYy za#HLFEt@fNKD-nfF>GzoUjJt`L+A*IGL~B6u=BA5wvF%M0Md%f=jYt_iF4;eSQ7WW zOk=QRUCxasqJ^u9&;+N`3|SlGBjjw}^Bzkgd8UlC*^y@DZZmvLhGnlLmw~wu=H=Xv z63NxLPS4oYsHfmdb;BBUQUu6Uthm;5Q?Ivg#K^d3_P(zGrHz=2kMDk7T?L4>&<&a3 z+}?Z(XNhDSZIyn)xxB71Y#Y-s5nwde0-GY&z*)B(ckdGK?kb2aC+)R7Q!d?1R_3f| z4_5Zmt=E*fyx}kzP=Ybz%4}i5et}y_C zH(@;~FZ>)T)+h&Qtdm+>UZNx}14g5JkhJ7I53*U5*X7^>RK=vWTE3Ip{jf@5G7Hz-FNVn0(bX*L1$Zr~OZ9pw#uHqn(&uZe{+ zx4qeD%*JK5F^huQl|MW{$)EyyVmh&U2S$d}T4L_)ndNJ?I3nN`K^_vs7*Vm&NKem@ zYEzFdt6WG4XuImrX1G1#pp6I_wipI-SxgP(>USmE5@J=jvPHm%C@Vo%Cou zx$e#mW_Uo{K^j4UbJ4fbK%^!h2zy~VnjiyJNEG*M>z0RDnZ#KMR@Zcp3XRxGH_uAz z0zlj5;1gGnsVJ!Eg$?UH^Ck}@yDLWx-3rB5uRt^xo$0$7yFv9MgqYS`z^x7#{BQ&o zb7N+A3!A?D2tTUx;{Aga+f!AH1GcB~pp6db`f}N2CDfvds?x|!RKuWA2()d)0mb0C zwrSPZ1g(!NnzAd6_tJ`1VR79t((TXH~SL@ynD2SdD zx!H=V*^Vp2|6G1`iVNd@a-}X31wWyypxerQ4OsYcp6Qmdl+bo0H69=}F~unHVM6lu zd^L==(;XxXZ6&atK*HVc;dSa5Q0MAexqT2I81`I>nw(e(8+j2F#mIdtoJw#osTpqV z`w9cHWP2OF+Ozb6w<)_L(V<=^?=EGg#vY%S-I--2hKeVZ>C17ajh~_sJJ8stP0ub5i^*W_O z#nxg*BAwpx>Y0A7olhUdpiZ2rjlPe=2;1fL0+s93R^_tW^N?_sL@j6!Bg7g(4Ew1h zLA~bCOJ!vX7`v9>wZ0dVG$s{zpR17MdDLwLQ#ZU!`LKw|R9NXv)th`;ba3}7L|&8< z=h$Q#yB*4sXiWX~4bv^NoM%677q_X}Q{g|jqG1`AJ)Jx6Q4C2sb+yIVY~9%Hl3^O1 zsk-*Iugx2&2y z+6X}mu9ceatiLQuIF=Co14aJ-3xJ#Jt{%7Sskda<{v$Vug?;vA#*DF4@5vIW_d?telsMtJ%us(V1`X4T+od+F7)DjTq%AY=t{Rm7A@q_P4HlY_=W{OG>T;6}4!+2spS-QUez~IZY z!5r{gy4cTJvaf$EWgPJeE2EGIkc|$lsaiC*3d-KNz>BriAL1U?u!T%9AK4{$*5hqOPq);}I^ zp%z;X$@6XLWExO=ER0+bv>I)32J`{6B1eVY4`x69p_+qZIn=$P+O$$jrPE=r6srb1 zeIix+@s$|gI#$o@3T{mT0IJcWA6>@W<^c4^iAJ72DX~3h^Z+Y8PAtm`Y9hc!C^+p zhw2LJP>Px6QIs=bBxK#+&hs_9o2Df5_i}z^T5hYaZov5y>qqUNUSlnU>CcPE;8g18 znG7q_DX;GwDA{XUirIFxJ5bw8fG@GTcQ_mONqg&t6ij~g8&1AEVw(KlrX#H#xd}(PBD3vb_n$~VQ!nf!?`aDBs6{OF# ziyy3P@2>!k3WV;l`PM>LSKf3ic>e0tl{XccMqdUCjWB|aO~K>L_xPN+GW2caHWG#+ zt;>^%D3!@+yNfyEq&Y!2Jh3xMD{WDnF)t;#M3GV;E7G_?F9 zvS116f^z{piZ~)eAc4LNZO&7^cGdx&S#23}LGbA#0+M z8zOa%5Ifc5L+utHLS3obOAgwrYEED0=!7~hbmqnd^cUR6FJ4EDR^{+~z5Hf7-1kT4 zu7zCRS+RpX4uMfsXxhf@Qv6Nqh)=de&0DWe%Io^&;yKYT!&(CxNuP8|H|NC(hMH+? z6YPeq*+bW!o~mtdT4u8k6E{4n zcL*U(`;(dy*UBUKS?lOqa(YLfc#ZX`J$6A>URs*U-??-(grecn+ovc2mQn7t#+1B= z)$MQ(k&Io7%IP+)`M3~zp$;?GFn)H~gPe!88SK)r#;tC?R}%?NFW3+2kML_{^TnIA z^}!0^rih_X58vLz{E$(-5SrB)q&cvQS zXa1-1@utsbpQy8+hnJ7m`Ed;Jcx4@!3cY z6~vP4TBEIVQ$iww>h%<3{)MA26WlG`C+ZF{7U)PmSQ38;anC66b7c&#b76&_3tIM` z+s-jkr`?e&AF&SrRrc4QGUeWusgk-PR&I@iN<(a|(oY}Vgq2g~1J2c0UiVavZ?Ro@ zZk12Z9A8dGPI+2Wz;I%R?Edx3N~-~lpb)It>%d#5+r9H&R>10Xt6UdNw1RC0U^_mM^Pssr!6UUGUdT+z<|seeSd_v+ccqbho3jD9oIt0sIt0b+x=Whhx{Ss_&nWCpB!AZU@tlCTz@T4$m-P5g6?gblZs{rmz2^nS594(vi zDuie^xQ90pYTC&afzCm>CcN@+vo%i4R%M#qd_@}fxQ!3CaF)tqs_`0UIYt$l*uoX} z!V9?hr&SH?0~TI@un-KlYpTCf5l~d(UEn&T_oru1yoq5>Xi8g)e>d)suO2!y%Dq65 zI$9OE>1a&(b78~E^9RaxGLQqRMycuTrcY5NM2#}0JMT{V39+Y~7CWdljlCW5+;`Xj zJ8G+lrVNcdq?mx!S6zN$G(YDwIsfVnx(t44QSd2ls+&9`%>C~3tU_80nfH-; z3SlJAdjrzM-AXt|x&b+}nsVD}qSb1+mX*u2AO5I{DY_t3!!*S%&gl$CWkYe}o}+|+ zMNvs)Loqu0DD-=jCGHtyRC_g}gmx;QTPg(TOTHh!XROE@Z;XJU65S3xA1%jxOX}s# zw^}SNnO`p9wmwQ;uEgQAv+92;$TVtNjS+^;a*_%=%@Mg@m@~3{NWr;B{T3JPV$dOg za=O*&NJG2Rz;U!z3V)hk*hq&i!91G?3DM3YCVWyMd^+o2BtX)4^Jguq<-?pnFW=0r zBk>S(o$%igzw8~E971-}`-5`v?jH`%+@tI5^3apxR72RdI@^yz3qbp-MY z!d4Y*jdU~E)f`$`g~jnWX|(jCYTV8hk$C>~tJ(;!IuL7B5Eq+x3BIxhxa@w#VttV1 znU}cCf8hkceCGn$lT%^`)Tx{GC260X5zoSp-^UIb#MVS=3u1gChkbqhJ1>p{eOS*! z1kR@Dh_^4F6aZA8TycnJi)z=;M zJ5d!bQZZW;g|cF6-s1})#?5+!?wUEf1lt4osi`QOf%L#>D;d~MbUK6aMjM*3d!}bc zNfblze#(d~dN|5A(O%*%lgo(RLl@ES;l8xpdaC&{X$N8f0TNr0?sxJ;sXaigw5!yA zSc9u%_$I*BrrRO~K#Y?{EXC@PIBPy^b9b zeroQH-^cO6KNW_x;X(&p?Pmd^QnffB{q1Gli#G2cYFZ)?8WSA*D*e~jEZ zCl4(hR@#?>&VO`36V_a?R`<}~fP4SBBC zR~dm#Ls=5L1f{S+9#@&bBeVbZ48CEZ3Wg)q}SjpPcmo7b7I~EPtyv8>3Js(i* zSX@fMBMq$)HD{ZWYGE;bZXB;u!fl6iUhNBrW z4%xtJtZO;eyMWNkkl*&VHV?JA@B0UcArjW8@qK0LnZDB4Mz1ISJ1Ne_lujFKMMa{w z4pZyHDX!YSbl2I^1`w#dhly~<;TMtN&}kOfLC(|P>d)X)aQE1czjWxgOvNZRm|MGz zR*9M(c;7|A&L)KgUf7H6qH!O-bK2{q{ZL4`i;%>!MK{PcLipss9*fSaFSy+!dklq^ zF~e6L5(5>l-|jX;f0Pj3Yp?)MMlcnXxsai}f_Q>J*mQ_4<5{EMVaNC0))d@8#4{7~j5cSd^Irsg&{%9oN z{q7g1ED}KjCYq5z5TNy=He*?T0h_DxQfSCZGGp@WX(+-s!9aJf8(xL$uJcV`lhTtS zZlcKtScaaGRArPHS~}Kae^y>JFTr46XWBpmW;sWsm3Dm%m@eb805@{79xXo@L_#jP&!H(O)^E@{?$ z?yif;Qnf!i1fiGj(Nu6FJObIvoBT5fU3jXb#DWlWLX6;Q61Y-5-!LX>4a%5AQfC0h46v0`!1f%jj9qz!+BZ?6o^kUeho07MT#vkZyQjTu72(q|z! z$>Z)zz_ks#w8BQd#yg{%Ypu4QK%e1cG-YYat0MczJ@A*ECNq+Xy)L6T0Z-5VVXYv) zS6t$y2!^}1Nf_pANG>cMZxtDv{{qhilGE+eMZwazKcZ9NuFI$MUUkbw8+cWAeUC_V z9aX}{B%c;o7bo?1H`k;S+EUP$R(a8E`Ut-$`XOH?a*9D`>(f2GV}54?%!KF;*jhIhZ8%`=Cy9xr zR0h6?mMB?y`$}A!EuefEc3M4A#bKF|Mek(L>NF7NN^p5QBS4%{-f* zX8MS*WEKB#=M1|+Kh|_%52@Ppc;T0;RSuC`gY@#1c=YMo_*{Pa39*d3k?r+}qtCDb zmH4V3o_=q>Bffp6IUJ+eMVK$;*UNIIhkNw**gd7j=~F4`u`Poo^(CjEPFvm&p`yQ( zOz`K`^%hFTl#yv!6a~OD>&;8*S(VvM-W+;nMsFv)@_s-BS+h^|Doj+GldV*=Q|?Y{ zMlS`8n;71@`?pk`D}b$Pdi(_rO`{2@uNchxrSEx#ZFPs6@^OZvn`O9uo#91xwe%15 zhqI7Uy>s?{f5-TdWH@!sH9xiGdd?#G;Us@aS!3W<({A4%o`9~22$;@Cj`Lakg|~QD zcwEG3>BW}qdtwI@sMysC@;>1H-DdXRoF_9$PAh_?w+3{wgM*rw$c4o z!4|2=Y`=#aDOdkBiKk!^TI5DUwfap2psgY%W`(V{qD7R$=C)gXht;$d>U~q$bVx&$`)%?KH8sK zITvd}EJ`tXZoRJrn`JxbBaD6V3jXmq_-C4Rs4W{frWdM*xZc|RLF1{FtvJ(&PtSR+cdtPdyi+_{$6DCkGBdxI zhKOEZLEGn$(E%X#Z4?`4V6*+eTDXJW1|5t~H~$Erc}7gbf9-A!VAq?I{Jqn83vz|B zf^N7Xvf7{I4R|{sZz!(h*KD@1-pc~*%wO9;hxOc7Y6*V5Dn}mB*kaKIm%wj@3(rQV z#RgU`Z)Y^*G|B{L03!3S7G-~eQ@@t8c5V5S^qY$|h=VxKb%QYDIGuE4LowL`Fv054 zZ~to7Z69?Ww(6e&eb2na25w54#_(4ypq^^Oe~|S0EH6+|(AdF6I;DML!wlt|s60Yg z5dcpEWsmVZ*#c)vUuALo^cn~(G0RT~=VcUkS;X$Qs#1UTr3&y;-`H78&j7ryV9+a@ z0tbmb5l88eIPxf`#{!DOFjIWW6@`CCN)aGb&<*p6`MnT2pB|SYLv_p%|F^)B>MaYKYq|Lxq*&N$ACc%+BEr#u7GI}-wt3F z^8pPdecCtu;+Jc1Js)_C+Cby`EQ&JLntj~^)HUL$gho284?yDpKe`}McS6<3vf5AF z3xF>V>p&ZQIkC5%ij^3bJC`!JjC8GlCNk%Jkoy!y0hV}>3dAI6C~qtA=&%XYX}Lku z>s@)poPkPs*1=~J@|x28wus)=y`S_8W}HS4eEk?|K;J#j4UB9>m_GW}#6`WaRUVU0 zDC&Lvs-BW8C%V|{Ti20XwhKNf^%6DZybAU41TLA{`exJ=NlpOH69XcW_@&JgQA=4llQF=oO0_P+JihXLPqp~p zSWpD<*Q`B^^9r)cLuN9Vj8OBYuyigNFe>LC#1?%0LV2Yns?P&v2O2@!sj4(STRs6x z!yaTLkXb^SB+vWf(eQ<2$pe2dB!53xcv~)+V4=l#FoU%kt*{3aj(mI8xqXI9Zs!SG zf-4hm9d)s^N zDo*PC`t0$jb>mCZ?TfX3s8`aK-#j9#=je9P7nr@1FgoLiBhurUL#|4a#8$WT;~4|) z#beIQ;!bzS@(tI7Uxp_DFpJAvqh#_i<#O*{Ey*=cw1MmJYj~FP07iR8o7)$@k8uu` zUE-IAR(RzDM1(omFQNfFMSpoc|LhHUw$zjA@8tys6EI~rVl$CL9=ZUT>IJ}NjOh@= z&NVXJ;g>CsfGH0oDbs(OlKR}clSslVsUSTjWxloLo$LVt@Yd@|G-9(KcVEr}I=Tn| zT$gN$g3Y(!NdGwp4aQ>A15HGs%gum>!yLgaF?BpjEkjAzmI=EnutFlEw&6P~u!)z^ zp{ge8)@P`x)24P5TqCn7slO+wbG0t}C9L`M02{ih?sCX~?StL;k;CdX7;+5|b1dyk zpD81ntjI5#d7AW|18LcEu)V{Y$TvW1*sh+$)4Ms@A5tK*jT-}4CS5bWYowZe8=fK*1 zdFnjV&}hnhznZe}(t7r+gX#5?}hI#?|agAnwO3_NE2&+-uc?m1=D5fsD@*h!&hz{rF;}BF1VuFA!dEwkhR(&ljJQJ*5fd_2!vjV3 z4JNj0r7f}DTw_{VY)ePWokX@%KbhJ1@*99%$4d_yd;Y=9_Zdj7Q@sG`dnxGb%KT3d zz~76#TOHJ~YP9pemi`0AOPQvVI=Kv~+n{&){vEyc^EaH=A+QmlVh|u?5^q1tMG0$e zh+u4JJJIhXA3X%~-$=*W2O!Ostqz`_OW?ZZM&@*hfy5JbaoR9;I76A$@!Q2GLnzW8 zrYwLcETl*pR_Hj$xb*kB*lO2&|8>(UBf(tLz*(ajL(Zx}g2C;5Ww~O^pxUK$^8n7T zo4LmRnZ@%4jQ@x9)0GSK8MmUo!N1idLSu?(=yFiaP(f67Px766vz@=AZuSw^hgY(>OkWn*p=cf z{b*QiGhBzw=u?jjUV=Tpk$V~+1SA$lfqiXDM-92JxoM`8?#8nMv4u?RP8)ZD4G_I} zVGgjfdS$9JW)_RzcP%PZ!Gbc za#uUWtnq4ZI~(3zF#Q^u!vJQ8&~g)Kp4m=-TWGG8+D+8CR03U3Q|_b$VGZ%M(bOk2 zf;K~U1lxIvhjQEJ01nCS@~gp)j70B|c&A=jl^*>uGipdRHG7t1d9ES13Qh7G>;>g| zn}{dGdeVlaiEhQcwEPu zYiI2NZ}Cj<{So^QbT%$lzni;Y-uR_>n&{YGi9UAXRJ5YV@V5>Cvs5Y6OdQai)R8#U zuHrjNGbAM4BRCE0R(q>n&?Pv5yq;JbL>CWUr5JdP~SDW)QZ@r;ek3Mj9T_qc&fgY*1V}AlI8GPMy=tojyu_$K_i2rOu1?AE$BbWFXsGhsy6rbEc%|HKo21kR+mF5Nch4fk|U=lgO^L zwNO;hc&!I!9XU{~LnC^F#B~S6mY4xey-p_p@NLzSI;c@&0eA?cuC+(RT1V`TS(L#b z=b4{`+OxFmrejRFU)mTTrj>*ON2>YOtoV0`driwpSK*Ynu1h@LZ-hr*6#FR<@iXdY zLP614s>LyT=2uz305q=5ytFt8S`&mPE5DuJv6I<1fcFz$S^ixjk9_>&P5XYn%~C-f zE@bq-Zm#|W7~gqx`1fikVLP_7RtivDT@p-r)O59MALK~FkAzjv{7N1{_w5`d=)T># zSN%(G?#6WlbhfY9f$E7&z%`S^`YcgCLktx7z}uu8TjZe;K0lFdKi|?F zWG-Fuq4UBEL54f8_f9*`5E};`xt4<3Fu}|JG6m3XN}1pqR07 zg6P1)e^p9+({{#Fj1;Fib`iLuIkTDn`{H~308buc)QYn``L_!U$j$+`bYt`1F8I$c z{B&{u`I3Kcrxq4zm1tKSsMdVeC+Y0G*ETjC=!nVbBhX&@o?3>m-vE%L#y45%)#QFvhAH zWpoZ0bCap8MgkNrxGypbr$mpF3J)N_LdF@fp&! zDQjSf2zvGHGrx50phxeFc-9$g!)Mh9qu|ZwbDhICekZCRqwzI)zFYW{DJC$1II7ig z%qwltoy9=I@)Xt8XD*ex#OGu&oSkvUkj_Cmla|py4NusQ=}bp zrQneS2b56I=w9gKzW$xQGoM7XXtOKT1lmmBJH}jt%B$+qb64U;LpD7b zqU6fzM?dPiOGj8D^7P+b5j6@SAm=M=FFv0?9)CJaPQyj4%O`?hZui?;z6QOG<(q9a z?4&mwzf18FA<${$(Ir} zmTKZGbk0Dc25qRLdOfm7^}!Prn=AkGMEnmXhtZsO6Se}>Yfm9HX65-RzeRf5B)4n= zZno;fWC;t>qB24Ni*yX`JOG9JOC^c9a82%~wI{C>%?nz2SPCz^W6W0vFgtc0rTOn% z5z5C=F*(@-A|DBmMD`0BS=;EB+G2JUs(5~^O~$xBbkHMqM*Q~w!xFf@NS_5i4=I1} ztekbVe6?o;YW04k5z9(Nqc2CN`EQ)b_ zq|UaST+=Jufp_=GqA#EgJgo1uE;A1ryu8)O<~xUda9j+a@Ql^~l{xdq6Ex8PS~AS! zJ!G6d##0>aac)Mm9>_$>;0nl1v)V@Sbth1q#%dXP2}@w=Bp1G(=sck8utOrZ0xOBg z>dx2DZBCmRWpl9pMZksml^fYPdTHfOC}h2;9sx75wXj~@=yS|MbhU2Bkc~z;PM)u} zWY}}5X+YDUlgGdYpp8o_-m8Djn z{dh2W&HK8Db}B@#C3!8R>nRoJK(+LoHHrx$nCLXhredcRNd4Ya3oGc?*I6OC18F9D zPw)@qWAXP2_R#f60nlzv%){Z!#=3I9JXYQW!;ve^n%+OCB#a9?Z3jK~&6LEMX(~QL zl|Nh2@G0v+T*ox9sB!c4?@@O^1Kc*bj`dCm9Si$V?^P7rp1OBmeqCR9wu_+i;GhHBfFC4DnMY1^yg(wniz-2Z{6RvwxE?ZM z6g#s#2}ULfl<12`JCoApn8p*IJb23EdnQtVHeDPjlA8dmauI%0cXAUJ)C@p<+FWkdvcKOt^$VOBd%E3pAn0|ZBZr$TBfwcpFLj+lF z`LzyNf=C)0>v8?LyitJ9dd)I{^}?X?Gy!dfEGej_`=4=QJ3`idNHkKj$Re@QrjfiR8{a0<7g&fHLN( zJG%))6Jk0=CjLaj?TW$#1oerOV#=RkG;I&Bpd7HurFp{e?UGwO&WBV#GG^q|5Vi^3 z=#QH<+jL9Q*y2mr_a*pkWc8|mK{ek?GFiBIse_e@1hIz4HPSrfNN`I@@RB#~I8^ha zdVfW>x}FDn7B2({Nh5~gQ!%thK%`)~^g9T;7rlg>m(+`Uni-$0#>H9YZQ71(+z!bP zWWz>5f^qZl2dJW1!Ee5sNd-obu?1|ybnnJbfiQAikg51agwBn{Y1H2Lfua$RU*5bt z;Pc(KV{PU6exjKzaFna3yhJ#+%nwI-%|-Y; z_ZVFUb$UDq&)`ISIVE)MO32$b-GT{C(m819cu*uGbjfqmU-F=*?&wRmi7M31BB9hr z$9X`bgU1)y#TRidexaRk9%E-mPZsp(1XD-7`w2JKmdkQB6k|53bc(u9Xc^5%gohw~ zCoNchu(C_fA;^K3Omgp^$u(&ScA@uvHGo$mlzu%803*z9* zL%WTD)Eze3CAR8qU2BX6HrBd}^9&RprrbeXrQ5NUupZKpsr@01BQ53Rb8R^o{8(|+ zPR>Q^d%(g89Ot0Ibo$*POqf*=@qtF$vW3pZHJIo8BYC`3dGfgMp%T$2SWk-}>b^k< z1hDxzDR=1C8DU3NpNPW>}>B^2!wl z*B@KMh00FlDy@e_@f;6>1?%yPY*hg~HH9&-vf&f~@3kp^^SV9p*vk3%_AZQRpCX?5 zZqM;A?*}2Pe&Nc)o9&r9vM7FcX7q?f=;c_{Gx{!{IqS4fD4#j`PLAIR#sk<{Ax|(z zWL?%ZQ7G?^l;`z&k>DbFBgxw)Sn6vo~eA8 zq;o8kJg~nIKHipjIxG~fMuumo=m6wehDdH}Pi;?%!B$JWb}$o}=q&m2EASMn4u!NnW!4c>&XeWwws~NvU%QN{ zi-Q@K(P!yNv|Eqdg-GKJ+&y!|cVP(Ym;^x;A zimU3~4pz{)4IQ?pp0FufUpEH${-O%a{BFoOTOIm}evgEvV2Sywo88hhl_B^L)^ni- zZd1NJ5Jf5xHVXe)Rpl5JyEKyGq20Ub! zKE}}&SEM76Z?WT#EzDsXldPvr;*c-4c*P?5N14U>fkB@eB->>+t zw@n0WM4SGZW8ull$Em);M9Y3Fd+T9-Wa7D^HB3nmTM(gBjBR95CiRAA4 z%qJplUvhdEi-M3TT;7Kd)5A{cKQxe=%MeDJ#?SP31NG}YQzW#H;y2iy(P`(oqZ`Q4o#*XIRan;X1T(e4$2|bWCHZc z8FeMh#v|@_?tL#n3*pBG3|g(9N)%U=gg*fZ$~oZ?_rf$d*70{kIZ5**1DH%s(KS!F zTYM^j>x_F$)RxgFNp{HeJTx1>Nji?N>xf`Dl#1J~ugDj03`7f}?lsxyR*Y%LLSr+O zk9s?LSJ1`+qxf<8D}GDe^wMe2osm9?LfF=VO3&3UTRoRIF-JB=3)i3(chU<(S?|T{ z+4snYQrhQD);(2>6X~?6=y@!I>1N#dKEK`A6s;65_Sihk_fpqdivS@Cjl%) zJ0Xn;Clb!VzVv@oNhUgq{4($OuPW|xwY95(0~q7SNVOcn71#Rf56uuKYZm9@&Y64; z$=h`oE%D^~$ZZmf`v?iK)=C6W`dPt%fOZ+8@VPhAHQi;=NAvpuB^OsDvj}F z<0=K-t_jf^&w6h?%DzvQ?^|gu@y|5>{pE^2C0oM600ojXuG!;%o1;s7Q-O@>8Y(T} z#|t7yeNyBIo(}DUF98c>+@Nw4SMd<#MNN_X*+gna+&XY=UN!cQr&;)$Fq*VM=grG^ zv?fPyK}P|vODTXwE^`lfTn5^`agP<=zz~U?_F%68nx{L;6%*&@RJWv-6?K&d(0~SD zUBy%J+(!PpojhR+eTnbYVzWJ;Va`Ey$sSLA+PvghF=bOL8aAXY(}V*SWr*7npx3`M za;0AqVg4gSjq-2j8cKGqS&-5}_ZJVQ-W18{jNsoj`oISch13;M!h?mFKKsv`S*o7* zcskpQB=Zu}Nxm!=`FN{Q1oc4b?CBU~UdSVjAJisk&$rBc??;R_Jtyeg>Z(qjQ#;hZ z20ZMnyFNFKtSWgozG6{O%kxXoL3MM9rj?qW%wW%eTg(DhlyM<(#g&{rrB_ z+OsU7+l#)bl#nRmLm{VyXT>_kXU^Y3I}TGQ3zN07`{avdX&&|N2D#EVw+={;wow?6 zlKBt5%4P*|oPBBu-I0-_*~63`6*PJ7V%i1QmL<<*j-L298g*hWS-n7}v*4%e%u(>( z@aVy554lw1m)EEGX%m%b|9D@JvjeJn&#}^TH0>BL=Wm1P<0Bw_F3P)bNnAXiSPwO~HIQyPkFE`&=xsda$^Oz@`WLeS zj8v_oo}6s`gX*uvNTvx_^Vh=@E?F2T$$uk)61k6g)yJT$(_ogSinH21KUS7Cds=BZ za7a@2_$A$2vjOThTJI;2uNo&g|0oCggRvRx&;ir2BZ4k5x!W3#zFRy)Iq-9Lkv8@k zXhrKs@9?iU)~26*y^8~~7kH_5+)!`fZN}~4ujG`}mUf#k=6VFnuUd*_r}4{ms^)D| zp9=D;AF|J2RxEHHabv4rdMLK8jToq_M`MNoNs}4B9nLw3osC+ zlFzs?0vR1Ru8!T{mb8NEbepqBuVjFG>Z{P%iyjc|Z8bbfcRm>+um0sor1i<<1h}TT_4LwbB^KS0>IX+=ZUWga#r;uX;O z0aswtN5l7ND{|5a>yn@J?LW0fxK~ZfRkcMgngj7ZDYq_h%gW^Pp z1>R!O%>%I3pdzU?ouZOC4*VFIyUnmNIBc}0aHLWiSpxCj|Kr%1e!e&2#%^VQ4Xk-K?2%-sA=C`&x#s;IFP|0F-GJ$OP;FA^ z_USeiBaJ4n@y&ZM((U;e{MZF5hd+${wRYjQ126ew?ir`Znrj*EZO)3LsZINteNPb< zG3&EoIX4YvVX>xK(fiaK8kaN--??8vf95m#Tr@z>ZVP0fBK=w6tv5T%j^lUFGn>Ae zo0qMma*C~t_Or-CEx9!~qJ985vH}$ypPX@+v1WZ0{77i^bcE=*n;OfeNcadK(<@#g ztY&#jIQkYkn714Cxt-b9MhlP?#44|CY!*og02ELs78$$AC5gtn)OGM*DUe(%sehC{!_hZ{9hOPB9cP}yAa z3~QB8K$?niN42KMiHN8i?)7OXA%^X@u>2fL?$Pe!5%MBBFHMkz4)3m&^VU*(bAZll zar6v7vN|Oa$(@wRrr@P(KD3S-!(**=|7y4O_VMhzi$yC{ z)DWMx$-rZ(v_xu-n2x%zJ2ca=qjLe_1+lFeE+=F+pln}xh=R(?T?uC~^JGML`X zj*gcEcH~_b`}y`_Z~$2EW^S>92q{^XSh8<}oSuuSdE+fYS4TCJ==nN) z_JmXjhC|q~&-?Pp*oN%9ng7~Z;b3P0D|7Xn?#Zi(=pH^eV;vjI!UPjLq(g})i7FL) zzYtauy$2cHWL=_Bj1^iJx#FolXjkFQt}80kH@O2~aTbYfckx^xdP5285bx0nE-N9! zrnU%fGoV^1fR$=`zdwpkXrc4@p}yBruabfTp4eTbORr=|ntcK)Ry-WXaOA!1iQkW@ zsaTK!NI|=<7XdK0I)J8|1uSQ+M7=4#de5W|q#(xdL%=wx%3}v=m*$bgCmfwMr64VV zK&i}joY8l`WDa4P-*ZA@^B8zdpS~RJioV-)uxn~Iz#6m#`wOt{hdGPau}N)oMKMn& zWG^EFKpjhbV>zZ^>n!#&tPwp*HjN-W1xW7KQ=hup~}ISr6d1S3Cd?!Pm$-Rw|nr%@e7>L z@Sz7}ZEfMV z)kr#lsD=AAbOivqS$elSyFqu1fvSGdT!l4~;jIKWm<4iI-8G0#%DJnYxkXoNy8goC zx(A7Xfag>y7etQLJ%(6xG417`lGJ#O-<=s-vkWcIE>}r?tl^M#QCHNTNoSF!yiIY! zw?6NQ2cyJc3ugCAw^YzI=30#*Mw>#ZX3Ko%Q`9KU;6k|8(WD?tobkRt1GnC4f|vAn zTz<9Eh=#3gbGFaCBz1<<0ZNLDm2#eMN+6Rg7dLj{z67w)2cn0|TGq7)?5jp?`#6H2<>m zsQaJGZiO4)riSZC_5uq_Qhw#U%FYs|3(u~JoBs8lLDah@aC`fYGk)Oy2lfPu_N>K) zMB+<1Tq0H&#>7{aUpRwG4}mMZ934UN5+V=}()1E+if(Ec^{>;XCF+=9p=#-^EM^$!WHIDw`Ms%nrnna z8wgq@o-Qtr)jmVEGdaLLgP*(uK_`V}8ht+a?krvzmaDUw3YpK#recg0wr2OhmqwNE z+w!^})zUoDBKV!HQf;mhy0<;#BCtGS%#Nw7^=Oypuf3f9SEyoN#X5V6cb+IF_x%E{ zd#Udp40yWl^A{{2m(>O{9kM0W#!Bqt^2#FzaR5s8x10vM z=uQs#`2CFty}nw9MT>kmzd3Pw3m3Z;zD3H^k3JnX_JvZofY&=skmQ1kZ=kbzO-vS;v_$8?2n&|FGhWWZf6dC7LFojP6##XXdh3@ z-YML$t`Z8Y=fv|4lvs63Gc-$QZPU!Y{q=xJHD{i?Un^HiB>q%MG`d$uxCV-D$OCrvCV z`LM`2ALt}@MV9TiOCG+XwVreAZTpyRlC=aqH}n7MDRwi)4sQZHXz{@m4I;CMZ3;|m zdD@RkYcw^*i7G6B1bEoqjct$Oj{^BWr_+ALS@2CN@CsZA@&_VFb-GUmm-?R*>6s{9|RZpE9WgZHgl+##M(bHO);eY8NFYHx`W8Hn`x&tqWgJ%p3pS{CUQJ3o9@00|-Dg9-B zU+Mqzb$|U{rU&`6LlgR5{QI3KZoQtWOme%;^v~Y?=XHPG`oY)@aLLMI{FQ&d6DKFw znH1mUr~XM6|Br`SB5z)2Wo*gSf4`H|+2?M2oQCK8|J{%~KVLF#l^3oGnEoX+Y$K?%JN+<51V8>ZU@Tpc5hSm!rSK~qM%QPW zLkg}K{qjSV@3+txp;#BK$sHH$l)N(x$Ls6@(-2Pd+qaH!Z|%KPP_ey%k`V;LOkp zj-g;PP`;lxNi6}GZE58@l2?Hwob%#ft==8+bzGkkj*H&N>C0WWeM$tFde2=(ouL=) zcOg!<8RZ^;6We?tViv(!wj0o$R&W~UIZeV!_0f-T5~yje9 z9-uLs0nCzY%C+}VDvbJ}3yDB4CZYo=+_x5e&pvciVre_xrcH{5%k%DBE_E$d{TY(+ z`^Vy;#6I^}oF{ji+pGM~t=D2WK2X&>%I2PjM5~8U-aU+t3hh-Yn!Ce)t39lj+*SB? z>!WxRS&UwF1?f~iRRkw+4IjN0(Ycn@gWu}orV1>mKU5a0#VqRdUDOdhbD7v+54cgE zWUQV+4F`)q_kL>uYy!E&>mf>swxQYHt4xCI3S}u7S-fKMBuXcky=)5N@m`A261?z4 zinhaGe5OfvH0o0wx;086Wj=|e`dqLGrtHsl#M|Z4(Q|x#tX;NFEX?eI7E;~_2=WT;pa@SNj zI7eUXbqif7_aN!THNR<*(z00|0)Kbu_gncA!iaGG{j~hk?Wx)hkpbi2I4F9Va)wp45cuqe1pjT#*f9@VjoAQmPAnt|>zENjD zYOtLEqJ847D>hD>fMxh2Ey}>W%UN2OAet5D?ggX`iY5+;al4a=nt9uyGxy`KcgEtU zVKv+L_jbdSXX)%_3eM7UoC=G@8A3hU*pUrH)+0B?lDg+t0U5tMrL+!0Tt2Qdy+^C` zX9+j}Lu~3Bt=W@R;eACBD;{fa-5abmM|B)ck}pTEuYj?uSyPZm`1gRQtL|~469(eQ zeihwv+gsLy2eW(P-fHFmkN$r;@> zRJ7ue!~2=g0<-o=!wK;U-A~|PgwJG@;J`HLqRP^JP#dKC-sSi;za_b56&%Qqf(WMZ}|DQ z#Y6z}jgUSs@w99l&bSBgytV6s%2(Xu&a%(VxPvBh1g@aEtKx$@VZdzooAOP*#Y5Gl znZbtv8~V=}NyQ=(83yxG?S0OWiY|VYEekUzXu3P{FXAYzSq*V7Dt0^!c{;TWHRs-l z>T^EWYd_=IRTF+h1p8QqpKEYaK3YPaft;0KD&D>|ZMfbH%6N%%yC)yWmjATuI2y zhFiJDF2gwmu@MJ=Z)k_&950FGQ>P)-u+kZB1VRs=W3VCZE-a(^-Sh3Gi!X-A#ve94 zM@yekuYZ3_S8S!i#8ER<@-kuB*`<`2zvpLgU!mWUmP#1LceS+cG^^W#ezZ&UtM3t7 z>U#|h5gB-Xm1Y$tvCUHaYZA@J@mf>_P+T25=&tC9^YL(VbS=B^eQjBMJk>k@@(1~{ zZQ_hR5laG+4$D=z{KJ%vy-TP>Pn>36hdUoyOGHf(J_HD!vtY$XoJ}Fn1kU$Rl zo%jfDtu_Fiy~POUmLNn81C*;E>ba5e_4@$cYqlFCYW|@|BaBR9E-!-N0>4PPJFkg! z5O?aiO@G{Ux%VDZm#Lvk8ukSGg{3vbm3mX4gnQB8R>2g9os4_Vpa)UmgWer-P5k$~V0mC`6fThYGcLoNZN%m{vlTw{T|ioCE3Ib&@&N)4-_EGTIe-zc!WQgxg3G8ZXcRcAH0da> z2Ax8@>dY{UU#Ly%UYre6w67VE2fH1nKiq z_AaCUgtl2}DYr7lNFi-J@y-lqaC{ldk;;UN6?!GQqsDv{3bb?tCQ|~I%_2+`PbtR@ zIoDKs=HKl6UY4AmDt}bdj>&ZZcsoHE9JVWGoW>MzVBajn3GKUlqP7RM@dz$AMp^x~ znxw0f&*WX#psa15xIEE`FCR&1`ptQBv~!h7BKsNb2PrT|=JNc_B(SU^`eYCyA-KDp zoA2-zDv2KY&D9(H6t(mYEKab*0Uvy-F_J?l54T%Pcm%C#TWs@nzg_Ov3*n!$6?P?O zJK?9_s52rpD1Z>-=v5kmCt*orzP%9X3NYzhepiC})_PN9dJB!Ei{+}&!&r6ufc1r~ z#gsZKVJb!!g9xoPyw8*V(;8~gO^M*3u^(vUr(FrV2%S6nqBh#*6q~P5lWwbCI}~AV zw$hPuK($w~CO@=xwpZTl@L(^-sY+FqHHEBANV_xLNFT`dq<%ILJ5c*-s!?Ht+@-Ot zC%HJ1RyyV!kiYRlxt)C-H0qXW7 zr?a2OL1w7#b*rD%(9=G7YtaKK*%ptJj<$tlvSGNM99h?~y5|JmcUpMfyTnSIyhxA| zR(sIMF-Y++U&H&5pHu1v{8>0(29hgEW53lKs56;GV@(9FBI5;HuJ-ibskKNm8h##o zv%vbLQGZqLVNu={O(VZBfEc)(mGdSqap-2{-$wKYfHdLy5zT^}T2`HH$s|QhDk~v~ z1uT0_lNNnx?X8rD&kGaaNup+iUOJFOOOF3hX;5?Z-|DiuQzS*E5fQ~-aKwvr@CNNo(9 za}so9h8WvXiZdTgu+#^`6umlc_8olRlaUgu8`KO!g+25BCSH!l%N6!T&l`@@8Z95D zL`1e%2*0k8+}@Nfu!i#$x^HRc9xe)yZG#{>Bv{0qHfWK)9q;9Egf`%3#Bj;~zA6lQuw<*15=yvFiqWkWLBnGX{z80Y~ zTY~;H*gzx>-Vgp31_KAZg$6MShl2W%npbz(I5>d5o~O#;{?%praNyXbw(~q?#G{)H zCti^V4ge_{CvVVm&?(AomIt?vu_iJQ%8+rVy^;oAHOE-E73gvluZLL^1Fv5^)F$O= ze|dNw4B%gi;eAE}TUet2seEKqIaFS~skqKzXuGg%VAgZ;kl2}x@|67J^Y9u|X+71t zrkJbc#P}=qv5*?r=Z;w6Y6kT4ch+^!#f(`3g_TG5^{J&*+^Yg_NQMfwRzGYV__Mw4#3BEX zEF+SHS2v1*!kwZd0emoqZV+yy)ka-!<<>(C7&PFU(KH*l0DSJBPrgsQAdmyP^XNW? z-i*bGnMSsCF*x#bbpBGYwq ztEq;_r9P<^-SXA(mOD~i8|6tt3E6ISGKW`Ud6f}<7{SXut2#(<#aSC zyw~>_4=BbZ+#Y#7>tufteGk?LG{V>wMRL|Kaooyf5t!IPZ>e>neV2xxu=#uyYapwD zQzit2=7xo*XnPEOmH_{kfkfLEDOEJ5tlhTl&d1!0%JjqP^C`ZBFATb*^{g$A81-!i zy^T4CtR2nfC#+tk0Uw;GOn&Vpn;VS|Ulzl}{T37ZYAe0Tha}|$7T4m7j=YKO(RW=L zKAp32D}!lHGt}!2u0Jn@Ucc}}DTh*vG<=ZX zQqn=|vZ;eXrAVNv19x65)y7MRmh$UDAG!0n0@vm>q0w~@Zee&XBd?@Weo9x7i@a&l zZ$+?m~<$}SEi{;~7h`n|rg#%gU`)vUEti`LyK#j8B56C}W zwU-{(A@`$nFkAbFUGkz^)v#l|V|{rj*ZqOEt7=t2y`ZqFJz>Tot~N{q8t0VTN5+8RX- zl8LtHyINfS7XjnxN2{mP&$&L+!m zHI~MzllzErQGfVZB-#RGliZh67|_^v_eMtTu4u-0CEQ2n3Jmg>r69F3fsl!|!(NQuvz&Lqg(rTRWajx_jyKQre0wy-;^ ztCjG3U;EJFF)956f&04&o+xOF-R=I%jaQKdEgtb832;^C6EPY!i9TH1v5tjxiA}FH z&9SfgGgHfgj)91?i9-d}!IDG}&FocAV9ijfEpI#ZH8v@ll=k641bv#gZC>u+t84Qh z?Dv3t03?Ic;-&k;_4Iau0^h7J)IB8To9$oMq=yul(nt^ap@2 zWL8hwDTwUI-)^oTnuQ0h zrPqNoG?dKM>+(+Z(|&XZYrJL(aNx>tma*!%%$rmw zi+FoKP)MiRyU!IoAshfX8n~B7cqV05?89WDydoj0x1zM@vHSlB{b=`YQ0=!ZqJ92G zNoBy0PnRlNPy%ST0$43QoF%YNB5Hj_{P3Xa1AOJr3~u`Qw6zggOwHxGxzF#&;UX5y zV~ATtpqtU&L<2Q_(ck&F`wury|MLQFYtJ1eExWx&^fb!B0j&Yvd$8IaJMLuRFGm_U z@Vqa-X_}Pz*_`^q!IH=ZWIXdKS;PAt!4WB*QdYkpP(QLI)bBGPYIxJHx7-||*SHgP zy5(kNhu6N9Rqtn6CCRKZ=-^r|y4GXolMFv^QMtlVpLZsk-cjk<)T_v_CA2Zd!Yt5% zM;&p^tA*1_t@4c}4Qi;=MyG;WFm)E}7Jb@)Lc(zQ^(&B__;|3B!~%kv`plRpD9&xNu#{(i{S3@^T!G0=l?rX!5X{GBV8hJhgN z*)DsQoGS}~snhud8IsSEwIzcSq2CwC^=u9z^@8rY26xqTF>fW__a4(X=H52XsWY4X;Ud{Pj6!{>ivY zbVR=B@5<{vyerT19T;nb#C^&o$8)kBwRSa}f4ZE(Y&r5cC*P_HQKupSk+3b8rT z-0s+JpygWo=Di>2xCQ1~pt4o<;q4Wr zqtjRU0(vB2%qm#=D0@Z=avsL#pgXqsU*?Ue z34|WgEVQQrrbgW{K=v5fdL=)tmmw1XHQg8s6J)%c)w5`UYRQ>EECc`>N_g#S{us{H z5$rMe0PFT-SQ^N|UNfl|ZRV|2YFx{m(?(Ok^QE+OF#}$bhr6lvn51>tYe;9|6~H%t zIr`qRl41F=pSK8Tb!IX3tqgrQdH1jxw8gRwSee3RUy!b!Hj_MhOKIBhmV$l6&aG^* zf1-l$B-egV^FEHm>4&{ZbdKK6^5knMt?yV1+^CTsn{ti1iitc(9|c+`DId|RJ0lNr~xQcCM0Dr!}~{JA>_udw)2#QLRq6i8INbgM$M34@lMMRqPX6QvkKsr)IASg(O z&|82+K|pHgy$DDPoe+B9ow4luoco;pe0#3zy{>0}LnJF}tvTnIbBysD|ACCWo_(m! zIo)3zYFv3o?&x_k@i?$3HZMYNFr#$Or#$p-Qz?c5fe}Yc*J|S7y<+pj5(7l@R<}v8 zwlJ7^K2ui{^XS~uEzdQs%@%mhjs7FgWKEwlxNEwhbMc0{Y6<8tGwMaD-T*h)s@C0#BcuFvD~?mt})9Z^D4#QRXCNo1Gz3M>_gu<$P}V zp2)1WzKzyQU^uSk$_h43+mBx?cU)ln8B^E54&nO1Y}_FqP!S*+^| zvY+yHxAm8SB4hcXCw`X%Y|Et@~C`Z(39Bjo4Y_ z;G-Nk%;qU41`m<$k-MWsJd0E`z*PXD#4oa8w33hIJW%CM{#ou{wUj?h*SF|`vE6X= z;w;=fHzqj^g4D#(%@Ugm4a{=XJN!vU=}^v|iJya4PS^J0>6a*t=A;w3Ecd5Ew}qxL zEwo!y^2+&4Y+xH(dY27inOkub=ZH&A>~!y$`jT8W8o_Z{Vye;3uZSg;WD$U|yqaqA z9*On*Ab*NuD?~_em^>Tzbg}44_0Q8Q8`ghwpmRMpN4*?zdI%7AHCi@S&HJlfjL5S! zRk;y8g*F^!=nL!`cayDr)Y5i9Vo{-d$lZ#VFsLax zpu?3*6&`#m`{7s?gg{f+F@?*lvCq}eQuZ@pU;q)9 z&AI9OMQz4pl;scc&o@%TM})vxaGhMMT*y(C?4w4}6<1E`yZV0+B31BBgmwYZ7S-noBnr z$rGh$Pq%(g-xJimrT`H-I2Y`fs}y51v`~0c&0R2D7t&z>nHVOu^Hg1 z{!yCDf{JXt^noqWFWUu%xC0YdO@5p8m^1D@NHyzrNX#WgMUfgYsv=HwZ}?AwrYFzU zKX1&m&HzJYTPMVWH5$ilfJTia3^!CIpLg?Xviu9S^v}Hce{oG6{{$pgZ#)thQM!t%RLyu)EodLFc)C~VNeD-I zBw#OVW?!B8IPZwfcAQeU)dEh&A5~INXJX#MZ~SjmNSTzDcUC!jj!D@+ z?T6WSddb;CYq4u^MwGrq;ZYk!`iK0Ro2oP%tcI*Dc--{`x1e3*^L_hEIN8BNb;R_C z=VgH=KVUD__%k0{)@p!v`NR3#6n5%Ys#&P)3-?!iq7=ERl$^)P68L}va@=I5CDyTtE-h@5mm*=U`B8mf>qYr% z*NKPdrCN-{nRvL8LyLSpV2cuze6K%Ln?8zmntdA+y;>zN7fN;U2Bh!i5A&Y6;QO7~IT=*Bd==z1cy^IslHhoOAh#P2eOQ~;RmE&nDSldRMjBsDYIxZ~ z0b!aU;FIWT4O6m;Ff&(}C|!@T>*1-xZrv|P z^!-cZ8~sydZh=IW=SbV-lKW16>8N=F&1x0%X_rhutn|;x2%Vi&zt4R$fL%>}UHm<{ zLEQ$sCi3QN&~1Yj2eHe+;xoVYrm0I0k|*8PTe(_6Mw{KxdpGu)^Nx|*E>JMjC3);! zrQp=lE=WB-08f9vt09V)G=*KQQRXsTue084 z&~U%heS!m?JmU&@7$cMB_>7+q14g0t=P2Y{4UU&s5xba(nDG;S+aeH(us0G-*-Y=Pn|jtJC4_aE-;mOZ7b9&bV{MB~kG#Wj&nW4#Uarm1=gS zJ`LNhJg`*kcu#NX*IZjk*CVgp zz&Tr4LC~48V|Bt%VDy=cZjEb@7`|7dyJmN#d;Md0HxkCKTQYlVI%2ye7m=qeJKdQ0 zV1P5TnV#cat2Uf$6rf*vp!ppKwV-#BuIXbAKN(q&AO&?gf-yvO6CTr+-XvxNZu|Fn zWm|f)r~E*R!?=)l&| zDxAriii9WX5^%v0YmKTy`sdPM{5@kac`*`!^O zBb*oxMud2rpuZs=a7@gm0P9mX>`k%=jAt}LXyd^y=m-O4!d4(vi_H&qf{UQ`8`Co!A0zRQuub=3qc<%+3if&aZZF0*q zFv$d*-#TBoXfn0btRhLcd0c!;1(R%*mK5P+hKT|QYuoPm{_xGjV+J8Dc^nZJ!>N?tZP*az4b@kE*R5`xY<8E(h z)i)xdQ^EZm@!7ig3oX+gQ(Qo0;yGet`i-ykWQoO~=P0e)W>J&L_pfheMOISo;p43W zAG0^nLAW7i(P};~7KRp=+VX|mxn@1D%6BLkGr1x3NDM5FU;dX{phv_nJ%5F6(^3w{bH?Z3K9A)_-E)C>nGd%E`d}lZ^Z=LA zzFUG3V@E!n#$hDm)5KiP&@3I#tXezt#Pl` zdt1v^oUpPon!7((ARcof3$$)&FR~LUV;!d>kM1&nkSwx2BGpSl(sV^)#$J2b8=EcGfP$DXS3eD+@rMv2u-Mj^&XI6k|QyA{-!q1@w}C*b6?6i73lTqRf&BX z;Q>RmQRC^_saCF;9+ez~Ey`ghj2|I~=>8@~xo0e!2^v?hdT&=xG=so`T?dXDzPceG zIts*#ltk#b3Q&7FyG2qHf2gLRf50~Gb3b95rdCipqR}|YFIO5NOX|PG6-e)%6?%=k z?)68fj1DfM_K}<7(@Z*-D4^#zN;T`@Zg+N&Tz<772{ZN8VuS|&Y>qOKYiTZcu@W{z zG#E3bVyWKrL!O+(iGy%=pXjSDFFN71}sOQ}_aOLLNbpvy1Ct~~`_>>=i?0*2DN^V=zrm(C8DGpPx z@_Ed+u3?@T7=`xjD352n1O+)5ssux~87*O49xq;-MH!Rgb9i!L0G$GV_higtFw+bF>(x;l2>8(hClp^B!ruN$=QpNZ?-0ZnWpiL~~!ut2q0X}#1 z%|cxPHR|gi_;j@z``Nn-#K{4*HvVug*+t;Su1{|v&t3#J;NF-gpH+Weu$nroxVPsG z4RhbND}ua#Z7D|*0@ofN-q50U-gX*^(Dp&=ZEb768+qS2&+Rgv(4_z(nH-QNo4_r) zHZoBGp;8h#?Dmtgd4YQL-tNkd-{GfpCv1%yW+#nz4%Evo?Y5nz z(Na$2!gh6Vz2j>6*I~bwMjUnrkXXBAfL{?AhvH7+uWFMyEQWxe-2emB%AAuVU|0m~ zMN>69C9oEbLGGY*J~qTk(rIRjEpz6pr;UQW9vK0!@(p8#zcGs})34;)UJ*}N|HDd2(lIQl=tmEj; zIyJg_4|EFUsCet@B|d>ZtI9+uF!uVMue|atzlHO8p-pRf>2b)Q0g-}M^IgRF^9OJRA~4G zmv8IRXLt>+?L5HMNd%Tcuh&mRG=|ez9dPY0R=W)IO|QsKNAZY_Y`~P~M=~SkXJdYW zH|$VdG3QYs-NGlnL0ry0T@N$eQGMHkX;s-rgVW)-gAt#n9rcF_daDW@=7$0|IWp1K zM^(v&sh$WTic?$FQL_Gn4&Q|2B3Tl4zfdHU&j%r#y*9FMwMSgXCW<~J=I)3CTTZHmk{ThRRa891 zHk{akk)x9w7WE})dVJYnn!L;IG{ezbU8DSMnCa45VgO|=`q`LFjaCQUaK(T|SC##D z8z$5KyrEfy7rLhFAb(Jj>9eQ`p**j>X3go^C8?wY>T2v%&%iJ5Zmjea>IXe?= z>MUxThy{>$vXaQ}VW^vz?rF-$z-r`Y7I?-9Vs$OL%0R|Tf|3(~i%K!?bpj#Lx2MuJ zf$3HF+UyB+ifGw3piC;T;tT^3)*qq5ja!iq>r3zj`|C|dTaOJ>IFQlhwmd@K2PaCJ z-|y+s&sL>XE@UKzk`VK#o`92@9hI=hvZw-2G5uXYzEKytc{EILlu@1z7Nd~s#%FhT zkQ7&(?|LqS)G5CJIRS)myG=^kH&`&`$MkszPsyVh<_u1X+aD1cugD_yKkrZdP8R^euwfu3AaHcL2t-uN^Xyicfr3{R7n7$0gM zd+V@ol_s6j@OjC-k)xLjR>%v#Mn`+4(D1#@-p6pG^VIz+t_ZEY{nZL|D791O#q7uc z^AR)rdbU$6DKeYrUnLI3s7{A;8J5!H*AKrZ-D)TK=x*cJkxV3pD$f@?421*x*tXlo zWv3}ja7#-X*=7D=9<{0rT#=w~*ikP)4IHT8H^VIwZY|B0aJoan5z}M*hN><`n}F=m z2c^_2Ex(qUc88-oywkbL-XS@@RAS)U4f7;`NvBjf1~FyMGy^l)>)EztU_Q6_&MErX zPyaBIp19Z|P{uLaXZL;0U`43%@!^rvoyerZ0ApC}2inY&Vai*+yup8Vqdy1Qs%Lq7 z`F)nSY3T{P+4JM!o;F^qmU2+;eIQ3ntTMeTrx&*9sxB}7Hns_QOqgoPFKv%(1=KaI zImcRA@eO=!Vk4MC6v!((N&oEIb{zdSF}N~p71%68CTZfSu$TiQJ!Ck)DH2X^u!(@} z`R%zafEbhY%VjEk?+<&Xqn& z=>_J{7&IR*HtornwK-iqnT0--T;95cS#GXn$x#iM13IAc?rylw$KwH+R{2`%SADg! za(7peR(VQqg}QkEz%*M^Sh|u^wWFuP(+`&l= zhn}x_$Dx g5y?H4OB7Xnn))F(YnmCXxYeh*o&?uOfFT_cI^6*GAEXKEkadch=@o zeBwY=Mn|P-;R@eX4tF&mD1qV*#nyY-YjDS(>c~YPd5q_mhe|*?5cyFrV%G-0LSu(t z8Sh(>59(~6?+`7h!+*)sEiw6a1)|w%OS)8mLbg?+PO}t}g##d{XJ1fDsqPu@0yA)? zp#N9sW8@7T(spghA%W$Pv_R$%N?OBetfHRR!&81U3x*5yO2+%ijVXP&J9bW=ATJeo z28OqZPWA<%Z+}d?jE;9Q+}mu5YTh1Y(t|v@4geCcZ@cPMrFr_6gBKRSZL&j%Hd`E! zzvk^aoB#SIDTXZ($t`?PSWfhWG%XOQbZ!{7woibO!^|i9hU$zz61U*_(0s@T?NxiI zHyN!rH&Clr))FTcHTiK6mtgn}Fo>dd_jjdxV57(luy-LG_W)*y`O*$AY& zQBuWjQh;Z`91-=oAiVP=TA8%TsGb$>5obGY76f8~TCsP)q3{imK9ydf5*L{}yYYKT z(WWYyX(FOh_$;Cvq@S){*`R8SoOEG!w2bGR&Hodv1U~X%bAEhGmf6TvVmg%fV^1?( zxi2VLf!45{X`%*FO%uS##tAk- z-~uUd%01lm4p*1SDG{!dRkBsiKv(DY2{DBMhqvR4a!2gv%Xa)qG%egD?o2r?2-Q9u zh-Wk(o+|CqSQx1&a40h3tdTrNiDLA^EShuil1$wud711oK4kqQ%ilyl$m5p_sv*xQ zV=woD{FdbjE#s^J>wuKMNTHNm)NQixe7$96id#y2GefaWv#F&9YD(F1tJDjd7!o*%{go?Ay^o@}YH5D(t*Z2phsDXBq_Tz8V z@BT%>2$V}m8z$+^2Qqlyj;>))q2Cr@gPrlma`87mg;HA}>EYa7z;5c6yz28Pnk#u< zXf$(vHpFrH=~sJDIw1ya^2<%~_6GaW!R{0sP}uf;9*@q~hi^X-R@?CJk^z}v*x+6r zK#0YljUI*K#7~zD`0xE?-{SmQyNEy#g3uNl0xlH}+%-PnuFcK0{Yq&uK02Wes#-W{ zGiC5n=AeHMt^P^E@Ywz0e$ z5TkDsACUAmtg}4bkIYjsJ+gYP=Y1)_oTCY?ZV%c7%!g$-P2+a@Q~nDw4s(w zL*b)ocTd~T$A&=fUf!YK+n@SzFR$!JFoo6(&ITz$^3jia(jQ&X;+v&9* z+|M3$T=8`ofRaqvrNJp3;jhzNejMDtGB0^1%X&HA>ECp_9GIEUgmv9^Dl+Z8nJKA) zzp*MAKBxy{P*wSL!Tbw|)ueS|VBWJyzXS_flBFz3G2X^A zI(GhrK0N~aogZ6h?Yr)qO-tuK7&z5&m18Ti*+r$px-MNVAw`@aYca1RoI~&N=7Hk& zLXV&m-JD5Lg~J#KyeG2WGaRVMH37o#O>MOw&trb+CiU(#mB$_e0g~C?iAE6W)C{+P z74{N#W7GR*%STCN0a1Rw* zaY-a`!oJop`npWdGy6WkGX(3gA3k=Qx#H;96irY|z*UmC>#MGvI!2qayaSr?x*i6r zaD!~a%AMovJ|>-~=}G|PP~KH7X-8yzVE@Yc@EA9q$E5|vYRz3vHheBN_?|ZGLXH8q z?esy@ZFjCM&sjEL_+p$ujmv<;BEXFR>S#edjQMVm0ug|Y8V}Eb&8ZsHz<46u?ej4c zcEQ|SRpBmR+u7YnzJ=fY2K?hHaA?@o`CdK82H;2HRT@AQOX%)!)a?$ZF}pGclAn3f z+lI%o`NbA<%%Ed=*DG5FSA*;Jyvx~26Wc&5YRQ>R+su*qwl37+yx|oqJ8bZr0_dyV ze%>aDzVCImgpa)T9&~HMNgP6XnW;uA0klBOQU3x&slrD8;(_9R^FXJX47U9imhrbS zWXry%%Cn!oDBga4a<11x!?YzPTY#-7Z3bo8=i(@pcpMkEm=jpyf#2|^n%79T(KDpP z1a43BvW;Fh>8ZW~Is$y|{?IMC3F?2ZS~^K34&bT!*Q{Gzx>DFx^fT%4O@86ymPe!l zgN(T8(8gP)t{0GaKU?_=wahgm%TE?MMhGj~=EADNqU zuPHEABWxY?O^gQ}H45H6Xqi=8mCrJ&N8~zlQC-wtE#ptob*pc&=#dw1+%@Pz65$oG z=^sc>IEZiy_zeE9f#MpJtOKJ2cWA1P70gQ#@ESE91gc^xz2H!xnB>;{Ka>xMcETDa zFKH>~s~pF{PM$jaO+}EyiOXm5;?0y|hv`Qqp!N+X@XhiD_UaU9AF70kYPkfek;n9B zM($~YO(y?=cit2HiF5r|>VC)@i9G)yZ`5YF^5oq|gvw^+VY$!h;xZU8;@?^WPI3v!_rEkl}+i4oE%++gKHx-ncB=9>0aUc z!S&48omN&^Qo&!LoxaY{q>3NdP!j3wE?NWu$mNN5?63=LWf>5OAA{5uUF&cMY)E|r zOzg{>_?lmSWGSF^lC;}1vY^h>=y#qi8ClIA66^dNo@t(Zq{HbKWyki%AN)ZIveKGF zQ0H9EowI>b902|4yX2ZfdjG{C^;wD_?CC!nW$4uS+J8g_eN)K+0Cc%c^HQu-PlmpG zBEK!yT#{G#QjxqnZo5n1dR5|9#95d$(q_XW9b9>_70k~JcICu2M^4*IiBo1eRSClUvm?c58myU9T* z9utz}-xZ~wZ84u>%fXdz@Pj`p-Miea$s8GQm=NHWnxox>Id@CqQxy%EbjEW7R z2SP-37A^VT9amFq^Eqn`W)R@<7VxY$_rj#~_ROm=6ljZ&HJBDCnFTf3%vcC;BZ*(w zeGe$X=Y zCnOPh47?+a_p=A*@p4l&olmiUgQto%V0hw9b@i3@iSlXpBDjz zG_v4+SuKw1{{8)ad;xTbF5iB`@`Hu?Zxqj8wKmSsSm8hHaa9HZ<-`_7QBcQcTKKyv*?=Au=QG&qzs&|tV|NZ?w_!A5PtLlwM ze{(0`AyprO`;Cthh&=iC_j?u$fwJ!3xE8Ng?f zXKjPFE|UY@7f&ht{d$plKT4Faav{AT`S%d58^7&r-Z6sc?+yzBqLqyT(Js{}u z5baIKnV|zJLQ9|z1Ir;X@CrnhOZ48|WFksrBnUaAvTgw7;V7V4e#?0sihO>Nz;J%G z1{ZDs^r1e_>kN!lyTk(y8~KnLeYlsK_qbLMwTETbEj9@vRv&AvIK8=Wj84?~GJ%1` zz-=aW1N8UtS9q7h2iUn13u1GC#2n`?s=p?Z>H}bq&MCwf|5iS{ecG256w(^A}=$HR%>e=Nt*LTVhOza%qSrCCC&Hpbxb`d46;@ z`$+Tlgb$acEAdyc%K{Fj52bu6g~9E~DERbgL70o*NotxhM?FilDSrd*tLFi(0!?_d zQoB8V>CJiWzYMjM1qC=>4DU&c{Jz~{^3yc%KKvJAPDH_N&?vC^B&9)QRe!W2pBTjA zg^N+xg1>;6)vlrEx<8Pa5J@G0@xRFZtnWvYA_~KrbnHf7IObiC100q0^ORgukSm9b z?^kWaI*9?p-A`Zq+i8{IOV4xsm`^7=?KXd&`L3e)e4|3#h_<`DFT0@ z-s#Gz4%TDx}ME0d1is}o1hpm)x_hnGWwkF z&IfI95;=Xd;rEe8b&e>J=){)!`RCN}-MIAxXlYEi5%2?oocx^Ny1R3dM;0{OK=Y_o z&nll8>8=qbM^j7cfXs%GDX8f<+j^V(zbjVs8_B`$_Ut-d3|st@S^sC6Eh;%(yX5Rw6k z+|LXoJVBB=V1bAlNfI`rL0iC)4NxIC2aToGT19?ND}S`#=ma{UdVg0X07X%=RIgdk zzwU}@6uYcV3P)3HW;??c)xg@d*v!~m`3QQm10asH_@j1|;b=!*7@L)55+S~I7pWP~ z$s`wgw^>DWnQ!P>TA1vFt!f2h%O;qgF%Hv>%^*X2YxovWjI!dLEy1@#UB>!i@-1Q( zftoKM+9bFM_XUrW2FQ?!YZ9AeQsm=T05dmx0G1h#+ra`3Uol?mVIJiThJwF?BjHQZ z??L;I1G^NI2@G@Y`h&3WeyU*vR8-A40{O}n2+^g(7W+w5I@d-sRTzY4-x?REYNX9E z#~VcTxhB}~mt9Sjv*Z6{@wL3_+vigZcR)zo=nA@rrD$7TP?8A$noidF3$QH~36Ns; zSE>uvSMy$GOB?R)`dRJAD2$Zb5bkRUL(;x~@;#pWWr*Qw@BG9^g=kBq*i2igDqz0Y zb~o$puUhJ8S1_%jk5Jrt0=A=-dR4=>N(fITyCpbAW6k+L-;@0ULc(jMH---Lrk^(i zUrB8of1+CNUGTUz)WHx&em++_p9fi#yN*ZM24u_G&<2n(z`1XX2py^U{DGWL4ao`J z9pliN9?4N1~=`3|pMpFUi?% zrB8OQ^y2QN+)CK9jT9;`Ay_rf$&t*Hn8P`|#1?aa}tgf(?2~z6aeuP0Oi(a1v5| zo6KiNrjhw|i^EFQT#I>%SDXT+SWy7--o9%``BasDg)Q|&NB>v587G^RiMIq$b@{g3 z2dt{%&m^>8K?50fAc~aDJ-=w_>{A$EPMaxtc^*5MUq0M&FyP8j0E*58f`pOSF`z4B zgcO0o27y{`$wU2VSQ-Azt)wT>Z_}*y4cxS|#p?Vg);uS?wiAk?IjXmGee7o}S_=to zfXy?TDQWZpNTwMcz4hmql%`i$3TUsZbsobOh!EJOP3*JJH-$^a-2_F+4}KkJ<$PY?Xw zs(SgsPO$i{(SqyerUv}Kg8*}w-xE?l&{!!s>y3KYC&K==VB?H+fVmg{p$mFHIjd+J zbm1U}YFX8T>_&nE=EGcnpC8Ir(q@=XV~b4kfKl9I>oN@gK{~7pK#k2*DSiQSU>rVC z^_NOOgBD#{-*eI;PEP3>#u;Dm>-dLvh?)4CZR)dFm1IUL2fO*_S@GW^Q zyI9shj4S(mp*97!x-7~`&!UF4y4<;)W|b%ey$@UyxQzpZ2N)i0Hod$BcdawnKbn7N zUfr!X=KO5pED3}GLk0J{S@nAdd5<9a$^(4L9=~>4Rz`9@F)~^KCB4#TM%o-$wg+B> z!x#YpbKfev5EM;5_t3+Kox#@%z*)p8xc ze&*0bcWW-mDO^Fss{al(uUV*MqA)Strt6j+VDTu0T}KA`0c<8YQq+jrt>NOy09u*} zq&5r8&;jhmLyAlVR{OngYe9RJx5d55_w1Q1GNL{OO|0dy5BMDxaomD7jDyT!^oZ8# zyCE9!lKfO{$xql8X}>*eKf^6+dke!TLPLvMPgld*wZP2SCagjML2_eo|1ho3ckd+% zY%=W}{5KaPl#mxQ5Zwb$S~V+fiKx6f)31{+X+Iyq_W4^1MR6G6iMvPGvVY*&+!PvRQuIDwfxS2 zIcwNraBuYrQW5UC(acr>)PPoxbOawKin%#z8&+wtZE^zHf6|VA(_qPVfPb0^gl^Jq zK=Ftfy|m1h!CDU&AFbI@8csj&c ziC&hj%Vh&?wB(0k?kkqA{Wbmf1L2E_myCQ2x;FnOWTd)641`pZykYohXjM<0IYEjE z{zQ|sX&@-3!K5otk4uk#%yx?Ic_a((ics;cSs!EvujPGo7P9!V-mZ<$U-MASFAh1< z2{to*re~(bG-TKafoG#-33(sDn=;48=duILWp3huiq|x#KpE1$<`mk4k_)401^_wK z#z_iakWb6OhO_=m19z%oY2IS;%^ffEw%Y7%KM77K2bMQ<>q~t25iE`tfK=)`agKUY zP|S+pG}n>OZ4S^S`;nA<26K=tc?Nrrgv3+PEWO>PO~~ITLv!mv%d4`2zxz(0?m2rcJ7UXv%P6PP>t!fhMI!Ti?AyQNu7EeD3`qNwMpW)w>(b7U`q)X z>$Fz&Uh}(SNdk_OoIkA{zs`P*Pt2+=YdNH~m)X-;O6p ztwAmh(7;RbKlQ&FGI8vyjx!H~a|W2V+sVA$GL0UaV!y@4bq zf2JlFTso2GLRBTxGAJ9>fRmgBHRX2OW~z%fb#d*2$+b4IIp0lN^bEYkdC zmlOlkGz1zoYd>%@#&JURmwvJ0^U90KS)~7z2efJomlFPMmR~p*4BgoZP z!JWs2y}PUMKLMiu9ah3J{%KX}#4To5G3K1=9m8+J`;Fdt&?q&)l+;_M7x3Zm^;Mt) zc?1QsQUfFXou@x7`|&$gd|kiXgGv^Z~W;OZjcHFKIrYJb<2LUt8@X!7)B#s?Eg#FsbdY z1J$2l$Za^al&lX`lBY5R3%yu5xU~&3op%?5=4GQXn_F_xy(_PbN6bME=-iL@TmXSbe_0x=j(AP8qG|V6i(2Cu|iVB zX&h)&tH~r!Jjs(YflU}epO5++5qNBdOO@z7QUjJiB2o)c_DeVlv?FE{Wf;l9RpaIReb1M27U?ZR>pdy#Oj=F$S6(zP_-;e+HV&bVil9wbf zr!>hmF{1kE(=VqfDnL`8U{oymwMF1jBokvsW+zL+=iYsv?@9~22J2Ou%|8VVvyHRCqQjHtAkkI~uB{crTnI^5-|MtGCa{-y@H)KlAc*@CyUBm?x~2Qg@D}PQ83_ zHSh&v;k8R*B{#@h+OIA-roDfE<0WUvixbTEUR=8OfLt}6UrOjsxV7)O$`|P)1qx)i zN2s*@_BZg=eHhzB+h)zm_UeQO!F^UzT%6V>hKn{DvTk*>zJ%@K(;7p)Rq|;P9l!XL zNBps^NmuIq-JwjGgv&v5&lo5s9fHl%no1yvT>0F+8xG!<)npYNAK=4qE zi+;PC7v}MlA!L7<*t*9I15ZZMv|ePCdhy4f$4;D%xg6=rc_y8T_%k{9rIh#y5=F5A zCLT57zkd7)Iiu?72V#_$Y|s4u)!sd+sVc5_3XwlQ1OCd`Kx)b@)a8#vm0zEk7yL75 z&fAK6{nGEtgx&|gDH_CJXhyCZXzsDKOh=1PIk7e=y?7U(WUKexde#Ns03HbF6{#qzM zf3Rd(J8;LW%mt#Sq%G!Tjk69@%zBmzv zWo7^C|IS+hqk^)P`20V%<1Zt^DD~bOm=U}c#l^qO-M`FisrLmZZ^HMHyC;6ztbh6B zi$8q#&O8tsD0n0*``Zfs>x28Eu1319EY4Dh{ntnO=Yjui{Qnu@U%TtixAK2^O3Ob( z0m13JuNC=usd7m)QM$j%n1?h z6%pI5yehf-_tP9|=&z6(Gm%`;F;9lo<0a9YJ}Xg*C<>q=wItoj5s@$^f#Qm=p~>m6 zt*Fg>4TH16M@n0xZ&I`7~#h(e=y+38D02iK%)33wk7L4m0zBLX% zgGS-|8>eSQlb@ky@nQY@wIYAmly=EYW)2uc^FFr!d~$LW*OAL?f&QX`O2>4@8b#3D%UkQU$~NQe&Q$Ev>d^gGAjv_K z@n|HsYfPecA`@o^a~53}(;lw20R@XUHUE)PgbsB(lf^*KSVqQfDJJ>7l32=9W_eys zoSk!pjPRvksauU>vqj^A{>W~nG_&+HSPeZtsfkS7#jJhW9$Wf}_|H1wyvU&o#uf(H zz(bzZmFg0^gM6x+@c8Kqnb`0%&!2iO1QN=PpNJc%2}ZDskA~_&_$?_g?b!969tJ}< zQ;EeDHylxgw)J#}ZHYS%a^{(+=$Gv*r$OklADP2JC z&2UUkGfdp7aVlxA14+KiW*uVeN^kEgEfLO1f(XZh>IakGJoc9jy>JUgY0 zKy&IdRt1FqwL#;M`fZet8>HhM0b_NRjk|^wyMvRz-NoqcuB4YjanA! zgt%p&iF~?ST)_%&Knj9>n)W-Bfq}68gi9<40q&ZesgOyaL3*LY{D|kM3!Q4PXw%{d zGFz~IGtmkJ+bF0tm;|vN0uL_HTr=Aa+bb@U$F7{Kq9^XTQ_s+`+JvfIL;3F#rZ6rj zCkxl9pr(+(U7KL@hey_e9e+x@EPRHbHfB8A2Mh>3YoGj>44L-drbU46NBLJl_IhP4 zWJv^%dkD*VuRA{M-Q|%B{ao{UqSD-+mXC3qnLYfCDyew$iXGd zdUX7Vf$a3^kYmUaEze>AvtryUAk2Mt|Ifs8C4}uE>9lz*v@N2h#e1a2Ky+K9X{^h2 z)Virir@X~m^G*pUF?wa;u}n9<+wrJ=XPw=@AwoQku6p$hrJcWAx6@ZJ`>MKRv9~FkVj9-sO}iM*@S;Fq9q#}O;TWgeBJ7}ys+Wsxyk`2AX!gin zadFY*8t?Y9_LXFD@prHh(zDpd>4A`jQT!5r1E?>E_c?CA)h-waH1{7ZXVqp>BS-#=d!r z&_u_2tz~s)jf5483QWN!a26?#oCtDYg&ondmVbb+-uJOEKT;HyIA5NxbqS-@6t7(B zVTq=<)^F`t+*%Bxl5aWmu9HkAZ&cA?J=m()4oo2|nfuu)dxp6JT4ELh{6kDV;4DPn z8|0X`tl!p7DrL}WLzj+VhN&a0d4ViX%Ta*nF$8;8YlcBx)Tr*%H6vygx5b8=jvITrFxmG4s7Sx*nhlWo>Mrk5rQM#2}|eVO{9SAzxDvkpjWU*}xu*kodv( zoZOA{Y2O7a!=t@ItA~$XoUiVVyQLvMZHd~&Oq^%fAMB0eM;+cpO-g+4tbd$R1RBpe zW_saD586Ez&&uyPe9pl)*qxEx=jq8dl;HnDN!}2HP3V>!2`YJAF;a1^gv@X1@7D04Ouk;K&k;0f4*Z;11cAmE935jqsLo|H!^beZHf%4E3&6oo{8C( zTj|0igYn7k*v++O6;H>`oaw+Dr|3M_UQBz4?$_7iKzEFtsfGwMhI=ZyW+|`t>$`Nj zw>s`D^ow1AUBIVhHV9pn6cYvQa&Y%Y=n6EpMsCb{6x-G07uKB}n(1F5_1Wu*Ho}N+ z-+^0>N*wMO8)32;ti+V7qOrQYv6syI4%K_{X$*&Vt>cpKbcu|4H!T}ArB6Jd#maBy zG$!zPU-io4M-@+dE;^brkj-2|^H+_PkId#$w=tLbWQ60go3=lc+f_Pq4d0l7J;-TQ zbqMdhU^-HDdtjw926=P^52;<&@?jmW;Yac88x~o|PRm${uR9EVacbG=wynac~ayfke!h3V&V;XpR!V=( zWXa7BWd1(M{OpD!6Ng(BHFQMrlo@j^n}e+|8Kt`O&TjF&Gf(gDo0wPm@Q7-NDYNI7 zst?-Nlj8R4nc*7P5OaI9UEuc2A}BIC6Lxd62O~y7?_>RKsNK9n%<0ome>R+zP?9W! z3tRl0Vrb5l2O7U!p3*4(iO6ocEbo3Cx_d7;UlJ`)fIGrv|U0 z)ru^g6OtYYj0;V`_gD69w*DM2555sbd8{GSHk_-SMXPBHOx4475OloJcA!rQ(){+$ zW+U!lH`|E8&O*r6aY4|SczBTJ+;*{H)1La)NTtE8j9v#9K6=5Cb)}(*DcV*bPQ_Lr zuvNd)Nbsk8{54{AU66fhbJPjrpX@TkjAl40Es?w&)cw@dA=GEh_f*|Tv2p7NsMATI zi?Gv2Z`a;`b5NdD^6=@0+k+`~;E7H+qs-A%>VNe0x^(IodXbj~s7>T#KK2PF6O1dl;YRF7|LN z!hH;rs^21%BEp$o>d{rs3JwrO@LnMgH-jLDE%*JLt;MH4O6@9NZ1Q*%#2{TXsn%V) zpb=(_;oc1vd%McRvJ%Ne44hR$>n*5`p9B~O)l-`pGX_x8W2CTUws2}_h%LXTLRmE5 zfd$i+z*~Psaz{Gt!9}Yi2apajHRrB9oc3OSif>4`mYKvOj=n5!{8?bd;E=Ft&NW!^ zb)ayjyQGwhu;M}+?{IsTRO`OwF_SDjseoi3F0ai6q^?>ayJ6J4b^HScyif4!elJ)x z`Y#Ah9JKz4(}<@nj9QW&GOVzvK;;a4e2BNM@X-z{=_}EX;6kpwEF^4+zCs`33yc!e z%@s+t2B?Pud8^GClpC@K1&3j*Shf+vDae>@2TJ=I=6N`Giv3j3b@hqFK;z zfj2%kY$!Rnq^QnlYoomgjDQ&`d8Rg#W?arVO zyoTNv`-g>ItQ+7sKAneohICY!Fs~^nKGOg${&@WH)LfW_ClzW=WH&7mE7YnO)dATA zyeOjhWxq|1x$3JSe)Ti?yxbFp*R>d92BYNfb}8Ww!P6P%wnr5CFH6c9boRwZlX4s~ zHoT>1<;j;r_;aTou;n*M5J&bcCwgpszOh&JdC$>#k76&)@w#^*E3XzWC3576%!gdF z#-{QYK|BDA)~e2o_PC^h)CO5{eZM<{hW2iFNzn4w-SjNFDJ`4hLByn7+=x zw<3~ivVKewQs{jNDyomn&X{PQ2KO^XNk@sxlHkjw?T#mZ@GE?}Z@ByXenVHxwZXZ_ z`q(jad7>uJ3FU=6&%3NedVmK^Xw@)0X0=_{BjN!88xH5MuyD1@^<+adu4H@o=kdTY zpmNVjc=v;q{`3%U$?jg(Z+uO&fsjD3qm!Bjwj&nTePlD9{4(Nk(b{H3iguv^r*-$z z@5fWK9PlnuG63V4S-5w)T2Hgl73ftFpG|n!L5cd#z)y?)-baN8re?nV9nY~@wQM@l zDjR9EF;a|!va!pW@;KV^zv*AT9RQtq*vj;E^cB#0Y^>a!NB^PBcYPaIiw0x%^dfY5 z7C6m_=Y@4!f+rva{;JVrFz!4>1y-%VbjqEx@sPY;f}vCb{ghHuHPLQ+os2CP@O@K* z?2|?L_lz8Ol(kRsz9|uo{;2>Oczzz|tz*Wyly-jL$90x?;Qf;Pb03L0Y|gkF%Y$t@ z=Z2FV7nyeef+yzN%^qW1Q2A4#HN2n}kB0kBJg*IN^{X%E(sj4T_E6z} zikf{+%)d1#3R#X6Jqjfg(R;AUXCMG5)Y&MoqjZ1L(DT;sZ+tNb2(-dx@-6_xqE5N1 zONcfm&wasBC}c9Bk*D)~q`{k3&)LE2E*3N3N)mhPB?4jm@H@(7Vf|dS@hj^bW2{5y z-89_qIDWz51nv@UWIElYC}f8Zh@?1Cj<8 zmg446Rbs-<5J6jda&LPJ$Ov z%$h#;!6;&0hv@k;D9e=<(v3#p8?RzO-T9VBuPRw)U&NM91%mourH_m6*^y&++2mSR zcX#tc4}dFEQX1%=GP2~JO59j3V-oB(1)uFRd9WBbd=|Lh58tIsr1?BweJprgW~q`Q z4qt5m@)<_!e{A}dMO8b?tZL}pZ3auM!)<^9&aciZZP8lQa%nNWK7=$!09Y_A0i`+R#?)Ek*+KLS=lYPZ_8_?lY?JK76! zK4^xSi)Y67q+LdY#;w82pd{Co#i+)%Dt7G)dsX}=|81xGCrY&({-@{J4}oIFx!DErhJ?07dpMMI>S5G-wm_yYU6h%wF-vz6fRXiF@ENqOOo~I za0V2v5D@a9uIp{^_SPw~PdS)Jxj!#_s#!vMakgG)A5=v685do7;Ob`?wbUbXzi{`V zf9;Yvj2EE}wORRP+CVIIdCyI~%C$^6b}t?umGSYkm2p;E8|#cbjxW!U5Y!S#13JA! z`$R@9FcSd|%i(_TE=!}>r2k~HknT;-Z#{%Nlz?+$V`8Oo< zh#Q9C2T(j{cbMu&&Kd_SanzC)5Mon8e87H*6)ew<$y4;wZE=i&3&woF&eCn z+AK5xNwOTSLQ{h7O6pV^eR#4O#F*q4s?3B*N^vMR^EGfN!LDJa!3! zC%TX(5sGumLfvZ9=L-uOST6KmaFM1=ZII{reocMrK3MA-8TrOdl7=;3FJh{8wjUTta@NpC;Hg=BU7JV`nO#g*0*;GRr>X`tS~ zd2NxshnWeEau3-cLp68W!)Y0RXgN4Upo$J^m9Dp%-RAr|FPa`ebVWvXCcd>xvX7RF z+Ql&(t9Ie;PGnrFz8=5&z|jY!%T^%F=!FcP6;W1YP<8LG^Ehc?C+7WA*-8hRU7$*Y zX49%2u(E?oaq$If^7eDo>h(%0a3jVu7Gt!d<*7`I&j>!x_5HOQ|LwbRV+Oi6b|cw9 zY<79inX<~YSos~7Am3w3q$1INkaDvqAY#eR2Gu#S$dQJ~b57{h$wTr78MASF&7r%$ zXAAUeX3mADTT&zfO<(2PzN0U!R3p^hh@g9|kA3^AVGc{(WGf7)V^907$k6JuUBTI1 zUyKl)Ryk9?C{oacKDPm$&p38QsNh~YD(X2E_1>G<6YF@qkHx(sg*Jjbgoy+n=W4#1 zb3Sa@v)*-%QdSql`NmYh9?Z&8gx(xO9K-Vtt-I|H6y#-jUx_AsutkMxinjdiK_6e% zad1ehpi<@1dC_d3JRN+L^|1(Taj5fk^CP(osu;>r|b@Kke z@5Wrb06CK^(J>MOu}qdgR%jPgh57^-4CZ_ujs7;R^4Hd1?)dw0NmPOLJ)>(3~_qkfo1 zGqV-!kq6uyj(?qw#ddk+$X=7*u$Y75!55ozTxMN9C^P&Ka=cVj&gwBc0?b_8*bH!9 z_2XS-uaViJUAq|lOU(^c>2EfyUa@KGbHM$4)5WS$UP89J;fy^%6g_=Cj_*F zvm$G@smM2-ySmf#NW3yOqx(no2J+o(PR-RJ3iHjPkj zAy@~B*pxJsGs1vCJ5|mWV+vZ8g8YGg+gCWIHtPjL*rMeY-hwU`T9=n7(W%J%zo{14!qF7oLPI5?@e0NNl3N*!Sz=9IClI zJMzCF=iUyg*EZ_bxNeHZG3=VD6h z7vt~z{ABu{XGbLs$IEqd&<*q%Gb{}{FUnzR$iIQ~{9Hl=0MfJ`jLO%N>eH8NtLb|? zw09heAE$y7#*&Ie&}KX-L@&2=?QBT->v*{pV*`W6z|qzur65qq#8bO5njy!ZcHfs> z&fFmNBqwhoIbP;iuS6R2WDjYF>#69vdcP`=_; zE2c7g{@Ue;dHajY3Ew2aX-8aZ5*9s|1X;mE0w&G4>czgbGEKXN#6hNYA9J$3Z23$G zL{4T_j8h(EQ^3v9?8USDuqHD=7?aEV>Id=ry##)T$D!1aaT(&4=0?o@zNi>ozuKvR zTnNL5(G1n zexgoh8tCR%%8FB@iV4?PWmXb>n5et!rxE1SHyLKK+5L z|9TmS^!5AGnWhOCnpQ93fUHBS!RYnS>fTLUTk@Z{wocYVdtEZXae(Tbk0_swW`saKulKFK=ddJOjM%sUqjWnX z^WiN}2mXj8r>4j*`v~POy8DR+W3mQMC+zGcQPzcP@{fM`cBzPZx7lwqzsZTjWs5B= zs5P44)2_$4&-_)UP%l56z3OKo$W}N^PB@C~&Xs$M2Sh!Z1Ed(EA~)pBMpyI?omg5A zwhY`DH^gp$a&eDhm&Wae?GQWR&p1^CsW64<+cl1UM zv$>lkV-Iia5+hCfrX*dQ16m(3ZlOi$IuD88e_4*04T!x>Lx4xK5I0XS zntddY#=&`az>7>j zEDuB_x%72?j9z6^8JncKIUN4-!Q4;L=v3P^SuxU@I18Krt#o|F_;wMQj-sqZ!BAIu zm%b|LY&ZRrOm#>DfBD6Jw>;mk4C>jgWDAsHNClTCJZM}mtE5WF+Nm#92E|X#2FnRh z>$*1%6!$yYNy(PH>3`zl zehWHwod;XHP+3p?)UNw|JHSp}09C}r{NYA92ZhyA3vV_@Mdx?}FlLJHI1-Nz3ajfcsip#PM3pZhzFkg3P##uo2q(BRhO0e%P8D zlrXvm*R3PPnjWgo*c+d-kpJhb`w?ZOdoIa*<(_1oKMVi8Hl&iFKyK%?KXF`jYybjQ zGo*ba8u8UC*W1QB4rfAw#H9jGOO44*zGfq56PNeEga{8zhw9?so z&A#M4%f00~PC~FhiGhG&?6!WZvERZwC-1_hkI2YfMg2NSltKY}h&xz?vC1hMQ!r_? zzoht~b2a%1NJml~{GGpEv^d)Awsy(_bSYUfh&Wc!M?&I{r||>@H&eU!VTw&PSYj<+ zQJ_DRcl)x^1Yf)7`wv#sH+W&qb^t*8gJJ~J=cS24=RrO(M;`6D)-QWlv(YaLg6rKIm~Q1l^Sbv%~NbObMT6oeU~4GpK|qp=^y zEF5p}PU;m)on5TDrg~^8pUG#Jc+PRahAi>wq(9szBQ3fBf`#;VN7x~f`KQUuZ z+$CRY6JwN7Gh0pbXqDf#N#xy%dW+FsS`g=0R$uC@fCE{|a9qE|z?I&@n0#TU*N?Zu zV%IWf-45L864qEEM`Tc5`((U$ehcDIYN2SFj>3#Ri`-@XGdIfmnxpNE%7@8pK$f$$ z|IPOp>+vLf3)QX=tA9Kkvv$Gb3P`S6x?FQei4aK5ZnR-J##SJr7m6pJR&5r>3iT$7 znA%BRhFE!YKzC}N>YauXXh#e~uRT|lkKTl2T6mj|fBGKH1rfGOjcK0o-~{G4I=3or z{`Dpt`)i_pF-h55KmP0_P)#Z8f%GoE75L!d=p4DEcXcy%NQ{KyH|`*F@9zAHcf0J!5Lec0N?_}LMA(tCk-r$pXNV${ntS3K9z2;LM zOt3tWQ{w9z8Gp2WVdk_dDsTlO46<9&$@Y&L1D2cH>*v$FoR2S+y4E3!D+Zd8elfQq= z%p(;QymU;=Q6Hbo>O?G)5D)tYtgpEVxV9`NYNg~GCA3D(2yg#7-k7By_WS1s-V-q~ zfs0oq+Xt_j-dyD08+&Mqv7&+uqvR8>7`G!7IBt&77i-$z_|X#pX0w{WfOd>M{{mPC zfMTwIlv>?Vsj1e|XY!zfR--?bdUhKdA_3~d>3 zBdvN@+UR2r`Cgrzv98$o&>XRiem%#pncR8!%mp1T^Il{@O-|xc)$I&qa(9K zcQotFpgIt2qnCSmhu3-pZNU5&Q>Z=JkbuJJU|$W`A{ewS0bT6)JS5EO-zS@O^$9o&E#dzXcKB9Y;=q_S; z`ph(MevLuwVWMMVnhxS?+a06p;S{e8v`GZv@%#_`lGx$6Jc{FQK`TFI1&G3L!?o+q zM!#$&3QhB6TmwCpKr{K6F6V&^<-iC!8H(iy8yvlzP-0vX!3ygsj6hlgDzMiXXH&@e z;}?b0l5*>*XDd*ua+_%a&}rtC`!BidC=964=a$HR{~Jo;V($N7KAFY#8&3ir*fMElZbA(=|evoylE<^M&W#E|eoR&vkP@y9kGq3f8@s z70B?{B3x*XAaM*BKsP^3FEg7WA^*{BHc!51UHoUDLO{6bgpb;6oA}utV4}yw$mD#- zz9H_XBQKP3PIYEWN#xWtR@4re<^hy%IBYLqse<-_$U8AC$e^n9?VhhaKHpE8IXBV? zPg@fdPd~>Bp4GUOTYoTV3-0u}ske`S(6%4QBe}o0Y+@7m=Z=Me8hEl)$dbeNv2qI* zLyENV9lhQKK?0S#sOmDSWargXwbt8}h6*pE3w35G!|_q|MWi1aFW81vj|YrbV{SKH zh`&g)Z4g(9Ef36Ej9gi;YLaf$=&3E^iL$bDuSenCF7Q<58?uHe@iixiK5o2x`<)8B zf8YtbK>|#YpzxBsW9Q;II2f_SB=F_HAwWlQtm1Ta|L0~fXCJs$KBaADn|{&&2KO6R zR8vjAobE5Tq=IGiPnIihf8bagklt21%?L0zMasf4WeXHB_JP35d*-Ozt|w;Q(f$-X zJyBOTEueheijE%?A}0JBQ*}a!LidrKyzi<-7pM9KT$9Q*D=|LwIHU45f`^_Ni(DR6 z(UUy565H`VU0hRN*;_)erUzc>@Q z=|7U0MB+~#e9h-oIQ`st)SGXmy)ybE^T}*ETLq>q{byN=kA2i$-r=?(jEhEz zH%I5?W@IOrQKR{nik~KKTf^AWe8?Tjs-_h%pzHXkO1LN3EH{(qeTiFteqHYiQbo(v-H%{N~ecdRuH) zFsymcV!UD}*r;c{`jLSrSUu6#3+9GbZ{B9(jvArCEDo17?vtf&g(S@! zn`SZk(f0!QfIIR9uoH)JC51|jE9<+yT6)l*BcC%Wp_}b4toLzi{oogwsi2zLb}3x# z7I?QqOYc~(-}{!$Pv>5UIah`?g4KeLFHe5hjPnA`h_+o1N@Wy{aN3q6wMPs7a7&!{ z@^>W)!GotvuSzfuw!8=voaZ`s068Tdj&y4esXQ;JNwKct-0>eddea`dJU?ro+)(e8 zvSKWCE36>o0qdR!Z+#(r3f{YSwmwT*tgm{ycYkIk#dl{kGwJSRgYZtW6y(&ouSP?F z*|1~B`ZY3N&pg=MFKClz66`iPGSa10hP_3wU-?Hmimi?})3zN}nBpxZ;kzn0bSxN!m2 z1W3p@*h=QV_8AT&xN}+C;x-{`7HiPb%bs!Jl!(3ZXIHe!#$<|RfA&&88};tdv#dm8 z0o7Ibvu;h1=Y;C!1Ih!H9i~%aBhtbIVo(^eYhNSM)i?*mWH*k8?_bx`KiBe!3t3&8 z3S+y>u(;g8%Os``_XGO_aeyxGLt^5BN?3+&MFA(?@+JSZwtjWT;wu{%qSV{_9Z)a| z&ODf3%NQ%O?5yJ)&e#2Bgt2?~Bi}B8@z)oD(yQ1so*c6dk?o2`GmW?Q>`VQy=aza? zBzm>UlmJ5Lc~RUk3&7+j_`w!0pB>Hd*#sRJ3y-0|gnX=pJ<1Yr6(Etuzr z>$rTAut>hRTA9?d--$xY&mBpS?|Rt`>!BJs8c&tap1)~7p(s#v=!`6W`BQ?cNsI|6of$H9ZHFw2d_4Eum@D*crWwwuUZ`~woiboZ2}YFxQgGFs0}?Y54C#_ z#@0Eio!J&uX#}-F{#aIA8NhT4L`YjkzuK)9yWhx4+!vfZ}W}ttjJAU-3G_D6&8)wUp z_-WvF?%-3lJvcEIbY9!nc{x`&Y#Vu^aowZIKKc`P_0;*;`{V7XMX4u4wM6sdKSNlM z27)eI*`Gk6(&kdD?(o5lR(JCf?4~Q-7*l&IT!guY!17VjA#unZkQ?6*hqP5ft$;?5 z`Qp30vz65Fnd__V>sGl*uK7T#NBzd+$ifHGUsEIkTH^v)E=8^{oU&e*F<;2%T}T85 zL@muFXR+6xaMEnwOe3k%$#i+7?SqO32KVjRwG?$_f%`kZ{f@be^vJ!8CvnZe`FXxda-I#U>O%{P|;8}lq5-~_|BowTCjFE#t=cYc#US(3en4QtN*9* z|Ms&hYk1dC)mRtLl1;9g%gSCz3VJmqIi*Z_t7NDc14?Cpi&6C!TTRCPkpFgetlDnHz_zSXrD9qWC= zbA;eqSO&4nigN@PbH_nW7b8lYq@f%HUabagT;ogGY=?AJ+?#--=p0IKD6`q--RI9! z|2zu^pS8VNs&vU1zf4nA@R~I!9`+vAtk4f1ecrkve6Dg%8VRS%D2spqu{H(D%s)f} z>jyGqs0@S`t^1wV6}DlWB<(ny503-e!2pT8#{6C$53~@%5+@JZ3D^x4Q?9z6PX&~C z&3`A58>SFN&%!6icpRJk6DJehCNV)bBy9h*M4sJ0GYRo}%KM_Tqc=zM*GkB(cVyLT zIk^j5dToKZry79)pA!=1XT$jR3U*py1s+VR$OpB?m+re&p0XCu?Xk@xKO5;K-1rCr zXJ0IQr4b5A8+(x55ak+eec9c?CJX^ta(S)yN=m&W3xWGeFz(_Tcb>Kz%dM{6Ra`Ay z^v&1vSRJa%3}SEhzG=<-j#&CmfH9wkr8WavQ|{V#M*CNp%`-fctHBxfnd_o;1t`)* zN%2{@+aQN(Gq~aNk)g9%OqKqa1t#qk-7AAbpGqyQR@9Z7z?P;3r@3=ba>l@K?dC5W zXA{9>R!h61GBvfFaqNaeeVNJ;T(>h90{=ol!_8E0;*`~k`e6WjL=(8uibir8Y7UwQ zLp4**8ug!MUnyFFj=Ua;zUr=W{l3nW^I*t{zS1o)yweZPDbSW_A0E;6t@i5 z(+pA9Z+dz9*pWH!rpJy4TmjS<=jsn7f+N>%J(xL`l<{Tde5_@8jj zzoxi3RC;5I<(@lS&~VCgJUvebuUBRl-TVRc>(d*uv@GWcVued9? z;`qk8LRgXU8(kz}Yaa;}4nhM<`v@jttyz@evLM0P!r| z-qnWpGvIP)ZcNoW3p!6oxVP@ia%Q_7@?as|*RprICaU%X>WeL>E@-6`@q6qJakmut zZs<}$JsSbn4V~2vT5Y{Ka&mWt9p|RD&w0>3K>4)tdfG>DDj>-opfI;qrXR(w@?nUK80K|V(0c#4ue_+1B@{%I(q z7F5t(JBa33?KB%tVi%)|W?)ZYQLtebrR&|`;exg_TzH!P9EVQdY=B0gOLbl?j^Q#t zrlzBd+lv#{zjDIVy#*9Y-E5b}#uGPw~en~mt% z9E9LrezAN~Zz z%KrRi90G;Yn&PByfa?58)c2Ki(!x(j6&u&l6)BBYz-h+1+6f}CT%w$!z|oaxIJ;^#mMxY7T+BE1va znc%B>L6pLQi;lZ?AX_jv6))8h)`aI8Hz(ssXO`Qx{4cAUB8$ytEiP*o)j;l;mdiOd zzw1KzWujFy@muA-W-!S_pft5>E*)S&O?XN5q>$cqKX!J1sQ;a;RL>EpMPj4 zwA!a9S*-$-E`BiA25w2ifS{~fc+8%*JTQ6v2umZnn}1Zpz z;P~Xst(ukEkg`7^yXdxVK_2f6AG^MJ2wD8=n|#}X=QFC#)5~lJ7*I4Fceu%+b-p^p z_ad=me63B4a@HF=XneI>k?AY;+by8nMZX~wHv4YmhLZGT2|5<*(JEF)2vS#9i$m`y zj}=XxO9$?>Dh@r*A10z5tk5UxxDT-ts6M_ijPLTwDo_%^=Y*6Zl|NnXO=dS+I4+OVAj;7`#&J3r4kk`H_W(s^r16RsGO|`l*oQo!$N}w8o_E zE~q4aztCuKO%x=1a;%4V?v{(YeZPia z2_7IE6cz-Z)ve3OsOV&z$Q-P!K{%n<`FGVtwKE{C8UdR_8V!{@;U`9%ghlzo*PvDj z+u8byg|=zwpts78Kd3Uq?>}V(N}-RR{Sr>O!r5(Gl5Z~BUjEm;%*kZh#(IfT0}-%Z z3ug0bm)-Vxcn&0D#)BGk$Lqdz$$#C~h&dxFVYuW@cc_X#sL=Uj<598bYYEd@4;uI9 z0b*~B61|rRHgE)dH6U({9s++POe=LQV>TKZ2@s3td~ND6%81>pPZ?)z>4<}}=g8$j z>5Z=UTNnFQ%}(lXYVe-jn-97IsX@yw<3Fy2@uMdC&3ZnJ!EL8^o-eRaz@?Zvw|Pdy zWmC~4`=5_~ZjopR2)a}P;e<0>jo*L%{{0z_;@?ueF9tM{P;q`F8NMkAv*@Dy>guQtX*&-qg6TK2o=I2t-=!>L0_O<`Ju&5f0@SjX)h%4&Um zAz1MF6A5l3-jgM_;R5}gOXPNXeOr+hUukp51t;cWXhN8$+UvudRJWm%h0MKu&AGskV zr>6cTB@cd{S>_n(odBOV9y}YOnPAkf&E0M2txv=J>CH8eqt-(Zgaii7Bf?h(cgrdi z1`us&!bY!D=X@4N|2l%-wJjPwpOq*(_BFf)ofibvHz&=mlEcy7%U@9(i9`ip(&Lm}^?aeaA@2EZ`nzq%v4n)eY3%VBJLCc&Ak(;U<{ zZon~ndLqU!0O*#p&VDv`2_b^6P(p$~hdAmXqkxcV8ybzf52bZ3!L(Lg@f<$+0uMAY zW#LkP8-0ss&Be4xOM4|JaDkqZ)O~-irUO;4TVa6@P+%22!H{mIP6C?eASU5-5j?oQ zSe~tXl<^OI_|}fMYdqCgrAEnmTR+rve5MvNc5Y6w(8JCr6PK;}0Hcj7g=D{IRwCAs zT7%GzA3S(LuZAcM<9_fGWO(lU(OwpLCeXbdVBYG3M^X8O8vpl`3IwD4y3sNu(<+9| z{)X0+nbI64O*(+u1}&JwcmckmUbnWQ&m(f0?X^O7!h~;dlRm98on-#$ZbJb9VtP;G z73KI|_wf)~?`9?xyI~^7-{4`bSIXh#bew`pbE~>`gNhKLpr^&&f~-m5uMusQf;T43 zScMt|O4l>ZqTk@}gko%mTG- zfaGI5$W%3?`@&$^^gmC8g zCynWdulfHteh}=)dmQ&?bz=j^Z{9Iejp+ew7`tjPN>d z1_F>zT0HLhwvUxr4bA0UzN3<%6R7SnF{-vXKZ3%ulV%hX1@B9T-1lKn%9OC1t`|=B zkv;|s6C2YyBlk(g@Yg$NuW_}xL-U=RUXt4O&+M6Q;q*4OMM|Fk!BB?imEzp}_)eVh zt#M1BbEnITC{qqaPeLkwG|0!JC6|x~Y{cj3NoZ?R5v$+TWk6&)lONvWrQ@h|4-W;) zQcIkGXt&7Vh11q@%UOr)>TQXo3C7Zg%pdQGhfODop_2()e#P6P=(k=_|NZSk&^=}@ zXIPPvFIXb;6Pmh2tp&W?(WYC}W!p8+ju|~9I4C~}AN9P&iR^5^RGd;8UJm7pST8y# zz{!u>%5*uzRUGjdvnXk?QP|mO2U!Q(#g8~_n8i?C$52mcB5C~{o3YZTGje-ijdbI$ zWpg=wZ?G?`xo-~dwhPL<;bI_v*=AuNTq%(Q7{>1%>&NDTF1sNu`x>Ge7cI^UW9fC7 z5(O?xgjZ#Ugf7{%i|_iC)hS|fWxdz?ry~1eLkV+cUkSepTe!8qIaP<9H(ez1iU+A_BZW8<%}19Onpl?!Ws z)lc0&`gc5!UYWPxvj6&mMmNJ+*kEwBicPrHhz{+AYeXiQv(7b|mfvKd~vA;2w+wLS^Tk_Qhz^`xJEST^U$N+`9hpTiar zdpJ-|$oE^(Sg!N%elM-f`vui6m06HLq{zF;p5l0m&$#oI7zZ>Ln}vvK%4`1V#8EIi z;V%7S!3K!?f3caUT-2|{|Beq`+x8as&`1y4LJXmHv5IhB@a!92ahZz)1@?K=Wi_DyXPCDP{=WyVx7TABeg!12%m!_JPaN0>q;huKtb0SX$6{dq4lq z88I()+PK#t3E~3OTNpYe3wTwy$ciwb_rH(^~|9XgD z5@~PW>xypjZgWI%{a#xLYTqA(e#XH1>mQT@_11f6Z?kN|XF#1X_#jm2CNwOpzVQ&s z(r-9LFhHFS=n#+zntuGn`;JIMC9bmzS1y&(7EtgD^j%lsW611T0X+EgCiNJZn^1ru zV3}hr@LXWNzK6e{G3F1CWdcj*{`p!jQKWDx(5@GWaeTzOmb}xcsPVKFuJJfSOB1R1^+6 zCn;`8BAkYLiTsEOohk@57VLPRZcKrKYRQ-7g`Pu`r!`eyo^K{A8?Q5bAhuW5LGjdx zW!T`g%eH0a4t8qzu<6&dZR-nHYQK#cw~ZW~2@xMOD_yQXt0I!#A?USP9FYG} zFm;%rPB`3J=qMeApb)t{P$$PHxlbBDG^LOJHQp3Shqe4 zT~4R<_UY01DAT)h50Qax8J35hsuHKV4j;hD&#x!Qt87AVeko%ZnBs>2@&_mM&6V+N zcCMiYDEd9IWULRSqptoB@>*8MnDfnuh;&%8VPp^GN^?)rDaZcMjIljEXqJ07N*OdR z=Q|yU@MozAHK411_6J=R4nxGhmk;MBxpG12A{ZPq^q(qAA~W65a4LtQtj3RQS%!^& z(caG4ptRN%?fO-kl;O;#-OQ_b<85qC?*WeYY)(7;j=--Pm%fyQf41;kd}fZqC>9j=kw-eRwRzf=_nQ76-Hp^#BShz*0X+;E{G zEs~r|x`3v!YhH{R&Aa1Rk>Le6bihg+b>!Ad)UM(!K|TNXN%y}$CrupQ(aTn&cPN_i zWYR48>iJ2unDdeC*R~?N`csN{?z#tc{j8+tcN1?=L<02Q5v^L-F8?IilCT$Xi$>si z5WRmLsWIuo9t7w@=21bCS^;i;CO%ux9Fx zFVS;1c(!VI(ha>niw;s+2oStX1`k=vHtS*GWg~QhT0~-isrSFLf0;I|<8}D#! zSb7J}UaY;sY+Nb>0f&sEO-lnvd)EIHeCSUP|3XH8US(*_TISgnY^tgzLYHVIn5-1% z@a2w1MkgBN#}82V4PN#8QwC)oUb1{VY`mgRLc!H08YNAA2~@V9=4%rM)Q!b;J?{f$ zv>%Ds_rfx?FHG(=P)TNfvK#cGa1c?gQA+g7OZ~H7z8CkRv?bki0DQ(V5x^pcBk zOFRN(Cc{UmnXr73R6NCV3%O-So>e=W@tRF2xD)dAJPLZokL>gvVYKWiMXV@H896IJ zg)QDVoioyJ4kh`u_o87uP3nr~bD4tipSoMJlK2SE;=97Hvf3A4S;3@Liq&a(98G;3 zNbxh20U&!k?Uuy7<&KYvB~Y%}08FOOuG@6|^g<57#G40`YGyp8ct0Nb&|{m(d`%!+M=Jxw(3 zjANs{c=re2DT=FXlh3waGH>rftKwZgY_LiRjBZJXl06zw$>cNdp(5kv&*=EGyp?(l zhPj(s=76J@alduA)mAl*f7UIH!cl$)Z5Ddf;pOp)>$!+mi{rS0K9A(ZjoN=K(UHS&o=jhX+ ziLrab+e1qx^$dkBQ?IQ$h*p7K9H&9a9pmyq)5R>*-$Kc{4Lh?90-^4;855`&8>9!+ z&PmQV|1mJ}0CNBX0J?Q`)sjXm&?(nJ?}Fj1E!vxHA8nvo-?5Y4F$FJUpN+ez1pr!COa}rLg)x^lrfN`J2V{?k3-LU3;4P@roUFjFe`64YF*5|8eYNfs| z=cEE9M*qFa?Yk)oNZwd(|m-K z1pQYMDkuzXXV6&RT#m6jj;&-h+^y~;q4Jt8{HKqGuZy6Z2BXJ!P`^zF;WUl$P9Qkt3?V4sn6=9!Lvp6{jdGM%qH>1I*1R)Bfb^qTALKzf-k# z2#`~bMd)6dbc@EoU&J1={et`VDSw+YXED!MRPjV+5);qrfj3%lH7-_E@|7s1f?oef z`El0iJs0@L?0kcgs2HE#>(Qb{r~>dt~4#swZqRrS{yJ#!kao4~194-EOb((JPp z>JE(PC!u?%yt-JuIW%>dh7viP?@uGV9ZCad`xlaX@%VG3`YY>KPQ(*ao38xFz;u|! z#`if)PYG`=8D7ofsJGrp!~KwjkIFObZ+$u^UVoJEPz*ncyTh$1A z8|`Zid)wmlmj1K6IR@DW@o==wVyPE1#@=9uI6L8+bdLHZLyTDO(-=Mla=LrGw^b>T zuYlZ#SPzQu!v%x6HSB1fIPm#xf7}lj88LSS&qB@L3h1ohqpaGV9t&^Rvi#7S>Hqs# zFty(4eSt=(aS3LiZxOn+NF0sa#{*Z!^i@`%r@M!B+xg{Cdl+TP_+~|5gjqjSr{Rfz zAZ>u(3q&LrCv+dm^90L@H}u6V$%hTA>9_9~-|h@GzfBb}P5QdUY2l^Qhri6^U+(e$ z2KWB{-~T5}px?pC?tJpclbgmUz*rxBS5t>@0f~wCkEM+MrSv}FfIV}B!tTzfeb4`5 z`kpp|32egic*E0suEH%{c00*pD)pXlojP}(TF|F5HtRm+xNRX}1aQNj8o@+EidtK6k?Z8^SOF(36>b7^jFP(_iVlqiHXOpb%t@V1H4N*Kdw)~lz0U5KS2m-y zs0B@H>lNM$-v(3I?CCR^3V}4C)zoJ07o#Z>9+lfu=L;b!nvX?JX8sp@Zygu)w)Fuk z2q=x9gaRT;DBUrXq*78M-3;9gg0wUUNP~0?|;u< zevW5m{LSpO*IsMwZy*{BmgW)ZF2iiDOb+uEgspMYA4%g+Fh!0CCkgk|NoX}MGr{q; z^>>%B;8@bzqyvl}HOlNTBIrKIzu5r84_7cC_BMy$HXRT`<8K3OZOH0isiX>Lw3MDh zsU$u-1doCXIxen zb9wn||j@gFc@EB5z91|J%YXAJXIhE+imnqTN|^}Kp2ahB+fZJmhSRz)cE4Q2>p=~oi>J5ZoDSp%{~GN_+q z(akokROY_eYjwVkI6@0IOQ>dY&%{)V32Zv23171w|MN&0cXdnVD^?s=1ps~3SFU;W zWVuW6qo*%sBQ(hsb+W~^L;%GZt|0N^vE1#M@xljRXw(oA%xekhs=nQR>~?9n3zR|{ z16QFGYQm$nADj&=#?#-HY~0%geC9^~2j=xomZ4%}&XC+d{iY&+k#6%YUof5$2w0(= z2a_jkl!nLmG+d7&L%r=a_amqs(SD$>kEfzU6uc#wQ9XM&WzlqGu~K)9MCx|Qh)#Tt z7eS+h;B-=pBE_|*dwO{woh%CDUQD0*FgpFVRF&}bl}kRSPV2#*MM0*>53g(Ds+SZ=Zht1~6><*eQ)T{zfAe!3FR*~ni|X?Yl%?1I0!>a& z{{~HZUg00>EU8Z7ET^jQ`I}p_4p+TLp4a9oNU@3by_ZNpnqYW?FOH-M+8Z#Y|6=HD zKMVy3FlEWTXV*aPr*rY;Z#+G}9a(6J&nj@c@pJEVbe}r^gIL=qNH)+6L=FHJQG)4j zBKMaV0pCbNa7moL_U_Gn5ut)6g?3w`(Ju{J#B^C-8bE1k))Fb048G;bufKnt6nmTK>kjV4CxGvf#rSef5fleM zHuuvSe1Wy8M{Ohjb{QNW%YzBzDNGYDscvDYt5w>is#HvnPJRsDJh75-T=HnX>}K#4CPx3KuCv~=lVrkNDgRq-_mPIXp|T!T<(2sL%A*0 zT1_`R?@{X1_%6TZFul^=bSJ}w-${%ei8!T9u=?XGUwSgQOxJqZS+dK!>OJHs0R5vb z`g$4=MHA+Ph#S2>oj!e^j&0E+BJ#jbliM}h!6e&8S~|OQ+Wd{jarO`ZoG|# zr9wsc&`+2{Aull*0rn_S!siT7my-bYuIqt^us>e%&nJ9@iTzo%+GqP5J?b_!IP^ zr)>5WAO*tNmrOrnTZ6p&cev|^W3cX$({ri*-~+Za>xynGHT!Q|A|(M~`MH|Hw+ys> z+`2W+Qr~|A<&mpnJYvPmjkyu;pEJ^;A?4B)yChL01?+&;1@VtLdr{2M`4t@B_XFs) zO>xPA<>fMi(UMpUX3)f#nWe0Iu9-B?vV6tDJ#!-#u z`DP?cHe#I$-u+K~2TKiVyn^3{~J%Uq!C;w_09z8$$3v(ux-`(uti7EI&g&?*o^5tr7(VkwH zB>N22Ixo=jo_{98ZqosjEa2h!T)>BqZfn{HON?ZlLJ?6cC+9w&%qXkjz%xS|m+X(> zdNw&Of5K5HYrFj8LY%s38~uCC{5N$<)$)|SAJARbZRKN_v?XkEOhmzHlIXBE6PD7R zE6LT^@=hI$_Auz^TtW(K$Z<_JeAho=t%+v71kuUx&i|}{HbHC8y_LDr2bz;?R(Cek zmkcM(>oGmv)V%qG*~Z{@xp$LB3IvVb)7PHI;?+^AnoQ#8rTr@d4u^COMoI&x? zi%1A5yA*Fz{iUu$Fs|YJ58}CrjKSyqU$7oEQ!fHlsEAy@D!VG4dFK5y?PZ4TbRS?) z)!Qyf237mGN%b&l#{cq>5=)8npV(W%FID;aW&O4`rca%*r}uWs)|{I z<~qsNEGgV$=}I7X5yV}Xh3(-L7UWjB^5@6-m#tAqztD1p*-7UJ-Qwi>Ij;TgFB2?V ztV0KR(i0~9vTI8tH=j7kdCOfe10bafzp@y61rfjOurZ*-1G#(KdEmL|rCgzbh?MTQ z???Idhr^jqzDT{IM5Rk6lf)$LyvMp^E0TQT`mo)lhYC(LS?Nh*WbY?h*PxVA#WcjA zy9!=1Kf|Wzbr`zY{>p<7@xPIC$<>NZ&zNqCy3}*UwrA#%G+UX({|dCjY`^*ONy_!Z zxK9{nST>WUWpARvkd|$I)XFQsl>4dr+DOPkmAdT_-7r=cbC`4_FD^W=UVy-onKtM) zWlAhF-4V*n=1sM+8D37JP4vR}0`0O-i7fb2N+OZEg9&n`HU{C5MZiZmoGe6&IDPg7 z|I{Pn32RtKdAYI{&{6~f3lVld#{|wPC8BRFHg7 ztFIDcBJfM%$!c=u$IX!-lHZSaW>N6fRQaSItaI{;9Ss0zhj{Zt)X0{^VpktARnCx? zGkqmHed^mUY0X_+Z75`oxB;Aj&T30A=Uu3zv;90`2^5}-I&JUgn(hEdoV|+Q@W!d| zoyrdFofGn4DzbOE(gEhPo~z_@0#Jcgdx^{@`#{m&v}tXk#ui;we!cWXA{JK}jhsa^ zTbCdCizb6Obfx>+DH=8#I9_P!tn>lx*oG0 zscZ5F)W*a)U+4go2__4Y&?l=6aq;NYAFtFh;gjHAkfh2v)|gFCcr~He9(P8@5L{xb z2}}lN`DlOsVC(%f-vAtb8ea{@s47@IQcT>!j(7nbPz-l~9}ur%&VIzcFq+ktnuL87y{fM!^Ma4o{)f zo?)L}hBN+3As{+9Y5tV0(nmH>ynL*byc;pW>`ME`v>QBsz=U{fhSnF>2X16yD4hH@ z^qCVg_a!E|)0z9B5h67kyFd)3vDJI$Ma@qz=Y!K%sm9HK!@ozS>wze%E+u#yHVsG!h>A!sV2LBKJgR40(*ixg>0Fn z>xnK!ny6eeToMN}Rg{IqpRzNtzmmb8uEVjJ;j0JYdM;n@MOTI}ryr@SZHnez4|{sR zN$e1I*5k+y(6D{INeyudkSrgK5gXsQhhHJ}WMH?o_^(wJ%Jjb>$1HC4l%I8kzMOi& z`bgX`>f5AP56=tImWXZ)maKZqC(s4HT*!-Cu z(6L?&X~P_NN_Y%>Ydv`x=rS63!Yy}nZFFyaVGrhG3A;zcf%$OKfTf4$ao{_;@Gz19 z5gn};z9`QZt{s+k-zI7SKiUdk=g1t)psl9nQYryts!{iHBg%jz#gzurL^w|qz8=S3MgjGgy6Z{^1aA`fi;&WlDKrQQ zT9QiGbR6bm&MGKNOweWo+{dR0vEe(HY^9H5GmAhOzMm1T-Zq|L-2#?UlgGr83|->S zQa<7x`{zPRw^wcIOvxpA zNSH2Dz;y}ahQ9l0Ly|`VuKu1{xqoAU*l!IW0qZ0wzw%K7Q4>%Wp8D10f&;JW_6w8( zMK#CgkVs=q-6rdEP+7a2!rVxL>prawq`w4W2ugZMhc`tVyI-rrhsA@XgYcsRtn10pi)=)xXJuJoQ8z32IX|TgoF^%b33D`Hm5HEOJ@9TuG zh8JCeIXm1;vGs5;%r!_GVT;LCCg|jlHBfoyNu4Q|q9eHRr7M41BY8?<>ujH$6!hP> z;Ajc47Rj2;CVs%yb#AV`>FIX5)UUPcWPz{U;9j-(f3981to&xnU;FRUj~~F%uL8AR zT>dq)d{Q9Ll94LUCu}sCpP;Ii_hq!o-h9pFpofV{1szuWs&lB%Fixt4xgCt^{t}`q z`T!TA8(8Xa^nX$TS2}0}22(GE4ul5KzgA1215ezhsYI++s05fgYvgK(wWTg!?VdLyn(6RLNs^ABk+s@_dCOe=<+=3 zsmv?U#W$`tmDNM=H0o#5QSwdGpEMk-r09Xu|ardN#^n8OG+?M%@wMGWWq7ih0 zA-9HLWN$n!hSg!$O5MjPoS!LqGf1ZlFTf@yB5r+b|Kj; zGWyovQf(4v#a*h*^~czDU8U2g&yM=6 zJ(>hJyqrEZC_y^ITtAZjTyxw!4Fv$v_{WRS2osXrH-?4L=Dl!pI#y@yg-D8yzxu{O z99ng<23(1dPVNd8ZM>`faX*+d!vnqZVbg*Ca@X>08G#Gs+#^LG+B^jDxGZ(4hh6M3 zyV9%vr73>EbuE1$@;dn`b9VCVKG`!~@78=$&MGxtW+#?>lh{`cx4GYq{nFi8{Cyn3fY z{y{yHIjnFPcm1=mIq~JT1`_=Di7V{GyK$3Dr16I)j-+p2iUb48Dw?8PfL@Fox4Zr>+%JD*|%LweYShpXCs(AT~!qzaH!WR)y&0~ zXTUr~ux_`_*9}@vOs|w(p!+TWw4v76B65u!S`}c#-tmChD#~O&;d;khKBmptHXQ+z zYiHY*ea^_zs%*yTWifuz{>}A8trzVR3nbl+&(B+O4qoq07za>ueXU-sg_W;b=R^^E z(Gx9@I$v5wQ#!;+L1on=tbAKlhbW%9f+3S3#LLOur_LRE^$(f+KI;J47U@vG^Om5&*&Z2yYx7#tJs&!dYfELgIb5$ z5Vf<^3ptP=V04M@YvH@6HWTKtJWP?0TOGZIuLy+8n#B{Y$vTg>W%+W#vw!FKLnCqM zdzU`qUoBQZelp%-<-gU=`LweNsFJ(ITQ^ydf8+z)0zmEKl^aafh;jp(_@)_7ervi- z%w!FUzv`?)M0lu>8?S{kYvbn6y-LiJ)@#&vIa=uWb^H3#LTVIqgdQO z87lk(4C;;F`>cZD3IF&k{#*-N1uSOB_2OH0iw^d7wB8ejA$|R#g zU)KlGl3M637q4ctU=;~2QtV@6RXi<|a`Bl-6_u_msU6V#6%pUz2rD9s4x}wYZ=H0!otm_!Se)Ye&v*_CJ8!N`68+FQ4hzSmZH=IqV*|-KdU!&XvauXP9JMN$_ z`pWPXY-cEZkFFba#7LQ0-KcTm&&W#ztm8;bmAcn}oPkJ00j)cfjTaj7T>E}C56wT7 zy{q;f&>KJBsiP9e+`zG|Gaq_1bLDS(Mu*mU1=^*wA`k#Z6#92EqW0lY_ilo1-F3Wy!#7$z1d2yC;(hS zs_0ktclQW^7u9=%OJUk2jN4e*m0<2FoPQW|(g~q@_br-3%nZ#2#~0^wsvz$rg+75V zud%G6r_6+YPrm1M$YIo(EP+%k%KwlOQ_%pFGPwNx>zG+&2!5B7I5CFQw}U%!iQg%ZT|7iClVbNMZyQyWr@o#G=HMZ)S!JY#&GI0|Jjinn{Cz7XYsihMjc(*9kk~V6Ksr`B)4nMVuNue zGH3kST*CEIl_*Ph<;471yUSN+i~g&7i>*!I3SK~9ZxiIP<}m}7Y6yF2jr^ti?pT_2 z7{1)qfrD>xovjZ(}>fmph!<%V|6hBQo^DmP%&W1ky1>`CdgU8x_oi7TY-_7 z@1j^|!%yT80u^#6{x!Hk!iNv@aK(E$Gb`VFDkyoN5?Sj@*`T2*p|7XNs*{o@P8U_wYo) zIyLLTj^NB+Yhbe+(-NK^l;%ZL+%JV9l_^65^!A^J#!bvg+urW z7V#k_yVXF1#T@uO?rgbW?dl~5GfU#9S9`e!U?R_O$dsG_O-^AfC(Ab{A1Z+ek1KPh zU4$9$vhStoMIDtPOvt$`PeY)7u+%I*_)qOsb62-!p0j4xayyWJ1^X+aq3o7pJSS?! zk_7o2yL&H-q-rfO0)RqUEXw~n%cGyvUuE1=ejN4t{l9d%+=t-M23C>Bo;dC7cY#a5 z!=&njJ7xA&#JxQ1rCY*%r3`021{BY9?(W{<(&_V+Whgs&tGOicYg*DL^6^5_G>hZu z?-?5$?ZJSc3_ef^jUu=ij{GnPQA|O`N-*l*cd8dWA7eyNzvFu?@9g6vm^k!mTxuE-xToC>B}2uUxcCK<3TYPP2cP45w~vey zT<@upWuJ}=x3=s_3-+ zC5HE3zWXoLz9;|pv;N=D`hU^-e@S4T^XYC|2q}ti^XwrZvu2YI22NybWi*glM8Km) zaIT$2{BUt3p711v0B+j!=E2Lx30q^Wm2M@R$j%l9gfm?C^P^3{5R&Nw?6QqK1qRE3 zG|2qp&^l}ahyiv4y#~p<0(A`f>Qn9Or*xz;;TjjI3}l@h+32(?r5}Ne5Yn3nDzc(h z#@oRdW5T|-4#-$X&TOY*W37V2Fz=9*-?(NMVLpc3lIG=nrY;o6W;z!*C>?bYP9g0B zygAvfX-r_Dlu3AUU!F3oE^~g@>*CgeYCPFr-22KK_g?+*$9`)!*4DJRoK|&6EEZDB z_2;dFoUAhvg~?9krh}hR>K^g&@ev%y+k@O&o>6}gpRbkwAU?UCtH}s)9FOT}ooyC$ zrC`FjibHw13MNMz&@KmV``UsBvlIgWhB>#}6aaj>xRXElVzMb&5Q#i7#xDOK(vAlo zcm*|qkKC&yV$8`>b6@ZV(Kl*%=Ujo;Bc*))coJ=mTs8(+C)3VJEb&=NA9fp1`Z7#a zSgE!{JlysgQT^I{u)OcYI)8#v%7%)q*{Srz!#I&qKTY{@RNwpGSb_VBPuil#RR*&o6AOU6SBo{I3`9Oqu@{)j}WT-RjCowtpbA+M!eIfEULt9-> zUG+9^y%ugQtt(j%8L~R4s#6)w?<;~zzg{s_GaH;(c$ss}R;Pnc@qx|dZqcR3XGN~I zJepA?VbuHUflg23-#gjZBT4cax-={{tmz8TW`AuDpib~kugR_M1`MG8cv?lhSv!)< z6`Un#lQlNem-(&9GGF8^ONA(_ImxyEDVXU^{V`R&?8= zc8%R=2peDz@|UR`m0xFR)YTsEOjY2rnU6mvpG|umK0j%G(xSQ;f6C1px38dR3%FXp_W!YMhQdO2eIWh+-zB`{N0w+O1 zONI74s3op=gYW=8W+*bc11doFqLdhoc)%~ROHIa8F4EPi?DRS+iq6CE#$ArbSIRf_ zkJkr9^N7{%@?KgXeN)_ANEH`#Gezws{(k2JiMNb~grn)@lq#+dZ7n@-(DLVG9=v}{sc zhp6Jx1K-zfcZyYuT$L}_)2SL@^aAbe25={ag7Jv$kP_?Sc+1--h{$;~iSQDg#cZmb z`^34AVKn>T>beVC?*dKS?XX^QI6x#~D`bEF6_5P$#bOlg4W>k~cdZU5oLAN2XA2#g>dZ&{ zqnu|ls#0eRbd)5DQrqQ`odkz#$_1Qo3&v{;I+)=$M>D-(s5L&PXW9-{lOv8Du?Ci- z{-%kSypyhkwZY;H8!w!AIQfAI`t&1c0lb5O+Xi=2I%yFy5 z0rBz2+vb_2t8`b@lFnt3UFfjkq!^!|Sp}@qNJV^BC7jvi#fs%lWyW??U|^E+qMvrX%$E$pWGBnX9r0x$wnewuY9qzUQS!J! zdGIn31Mn^eql%c*v~RJzqgSea?<*WrP`>|W>*S_^Fb!M)N7kwq8{ok1*4)(J9xG$F zvpcYEN)+cpYe;gfwl+J{?{&eaR8-m5WED-B6@yVvaO z#};=fMi?v2y;V$OX)Yu3T?@wJD^t=G6ANyL-Z9U-bl`jSRXr{zXukH0LhvDK7pGNc zTZivL=zL7C%qlmi$=Hx$)x@a}5Ai~199rB(9nJNX?_fNR5LbJz89Pn0TO zpDD?*b$u7eK{c9UFrg0s=U*Ol*wp;vWr3PhoZC>zB)6HfbI~sjT|b4UuNwz`r$`*V z3+98yo{`@3$=SK3aOvZX!Lr#8nG@>wsFc&PYV9^n%d$zyYq8Q39b?kQ1kY1UzSqr^ zeuy&@1Z2;Fs2@pLZaN>1N}AoM39ui<;ZN8cjwaT3HvP(cIJn(|rM1yk6Q>nd%?C|B z4eOan$wiG&gkzPp@8;)QWjC2ur_i3*;f>Acz4Iwyw<*_AUH?}Wt+>h#U3{fikTRFY zJeo4)XE?}{`<9EA%S*v3uchKr#cNhd`5v8hf1g@q0V#0BbaA!QFx;1rtv_Hl#XDfg zS+wl=H7m4w852oazgn zPUd#lEm?F2YJ5ymt}ZW(j4#+CQYkf2z}=sa!QQFyq`Q+1CHT?=&~mbM&`!do8h<$u z+t1ESLg<{zdb&tKo?MltoT(s4T$IsLO+~d@8YO?5**mO`Y7j`mtse*lR!WJZS<^oQ zK;vUzv*vPxII}tsT@P3`w@0f5*mc|cqSFz&QTp0Q3*?KWsx~c7QP}0a5{^hi`W5M0 z=BI5A$MPGRdV*DIx2LO`1oYxvL43PMJ#=^anXdftF zTqX+o$4T=#HQ$81jg5K%&uWK!RfAjZ_ZGfwhyt6@T}36UEZoO5U{aoYexK-}S?!tr zmMG_2lEHZ_tW)h`8^fQ016h+A@6S_F%yQIOQcT<;@ab6H#h2!=paBo{X}C3ejWWk; zniAbeW*inV^3+G2^hd(v&tGdqw}Fx&IZqiVSMpu&aTHfpMe?c@+E7CPF50H+(XENv zK3N2rqtOUblAdd(nGhcR_rbI2V3C_Rrd%7l3q(&yEC5tvLmY#M&Hrqp6&-qRdeml$ z)r@x#Yz5JRpT|(9l73HP*V1KjPSd~QaJ#=$-M9bRM0c4tvtkkZ(RlZFRn8t3*)4lQ zjSIb|4)Ps(MeE&V%#N2Gql{Hikit>I3oJ;&_abA2g zV@|3upZUeq(wBf-J~P)Thw{S_*^;T3y*AfCx%fyia>bb?BS2(q-$=Gu4#3g%n)0Ce zswSJ1N6u8`JQ{|QnB0a8b(~kx-1%WZ_ z(euIGK;!3ihYJDcPSyB_2g3z2Mf$x%Cwq!HfG*+GA=`#+UP_&B$$EA)YWqH3m=)-q z(I@6o<{(Gq?1$~4@)_^6}Ed!ymM;w(YFZ`aGp6eXM-$B{}1#Y;9GEDciE08N!j ziAUS|!7=FO+xwCoI9unR5TfuIMCX60kg}8b7%F`~G9)g@(4lrO<5AH!MMMLs*xzl&?*!q~tSC#FxCtDIVok}{7=YrB zir%{7F4L!Pr)%VJ+4+%2k%Xf)ZF*jfSy09YPrWIhaqC;Ecp@8>UfVNP;2>Vgrj9&t zqdgHEHPtYn-pe{S?!*eoe*9hYyt%owR#P?DdY!WS(gZyb4<_P9x8)u!!jt%YeO0Yf z6tYJkkEOK)mE)BYiThon$w}qUW~loPUzZoQ21KoK_7M6ui}QUm_FF=KdG&SOqIGm{ z0Mke;t(%iQKW&cEA)k&=ILaP=pdq=i~%!UG};#GGM^4cu$|$$lUZl zkBzC`Ukn)@Kxi4>e^ggLJgU0+T#~p6+~>?^bjKvlNG>NM^8(m zyu?B7aQ6Y~gU%*HqpKB!4bsXS1^U4@P>pU7uAB>9HNdDW7Ljcbha_ey4pt2Lu9a%e zaj#keB{9EB6SnVgH*!K&9BrB>&QndkA$ zwZW8fZv~&Wlo*QQp@gs<;Wo+LbbOGi+6ZaI&nCjsChQ;Y1V#On1EK8o5xc<$%N|&1 zZKzi%Bs90?3;Px2h$yH$jpC*x2U82M!=VPVvqY;&( zqc8Te#P+7TmrIZNcusB_UlSvmF@3bW3Py*md}DAU=yjCG2v*@$mDOtgg!6VKkhzdn zy_#@^rE6eHb8WvXw`nt+Nk0UZz>SZbC5hY-)*sm$PK<&VsSV#ajSgh}JyiSFYCmk;qDHEtrs&dgYzY=QNkGAxz$R zEb16j*M1SyoyC0)@el0R8e(=!5F6%P@48t=T|Q|LFRXZoR`{PF65bo3b#t3#}%$>Hh%$tT01#d~qCGwCiAgI}HQlJmvw z$1qJaFLrhim&ICHU9lZ;eLIGdzdV6wfwA^wtggtxBq_Yr(#1^&CM4EpvZ`@*LYBJ* za(s-Cqoh*U`u9W8#Q^yrlnN`WNkn#%jYgY4o8P-De1q_W)AVI#2h;F1!?~lGa~bU; zMYqzka-YnaY!9iql*{s??#N#o9=1~RBG5_yC8cGLWSe|#o8LUAS1f;0*jZ3h#9?}r zDyn&H%nyoeLkBi?J4W{I6bFa2D8xfwPF&2v#quno$F?q|h@4DPTH_Wz`@zKAbG}!6 zYe&<&$_LzWivlo6D`9&^+*$ra^sWb*cHm{INU+Cdepj?%@T2i$vWUl5R3BFjBBWL_ zBoa`|gYyEcsYxj{m>?IU(kaF2uItas=Y1X>t|#=wH37}=?9tV947v(XCTKB-w)Rx1 zzuml9ps&83cDDRbhd)%vW}Ztpu{6;CyI3_neOuxAI2B$OY(`Pjd2I{2pMIG}?R2+3jjJKNq+M5y7StO0^und=sK(@VdE33A=jCj>L$r{Yu ztMT?XFrq*m?%-nLU+cR0G?On+K4~(zV6g*tC{)bSbONWPAyx9Kd3WlXndYkh^r^z4 z0RFF(j&&r^n^9Z71Xa-}hg2K|I*fI`-b>-|>4#ZQR+*lapBMGCv0he|7$1pj`RU)G ze3JxPp^C8)>5UVUnOty$m0D3O#D^DT?(;~Y!^jMH`=J97hdu1mcVJK=d5NY2P{E&| zCns>(`W!tPk;B_5*X_6uXlbW=I&X{KIT*V+5x#dyF|vpcE&#TmyM2OK>-WPfJ2fD& z_kzXMPcdNj**!PsC$Q6RNo)+{h-24?D039)qJuGU9IajvDmHvv7215RSy$M@yw+{zi24>J?|ttarTe7LeCQO}rS5{_2wtF5U8 zDRr{g5HW=3JgfG)^JPK(={?+Tg9&Nkdiapz(FU}ersa_h&jx-AfE_hdcaPTqlN%2l za#8&-&Oo&I{pULZ%B)d))mRNZAa}UzgFg{d1`$hC{+_H#OPKAd$hs&*gMT zrE0kxsr1>^e8&0hkA-oi5p9)(idvUyQrCR>_38|^BH^g^-Ny=UrVpti5g&H@s%S_M znqJafdBsS-^~@w}`}f)l%7l^Q%dlTWokX1~3CWM2vdpEX{O1~*uEZET*7o_am-$BO z5ur(wYz-z;ZE#d+9$DCa5wYmRkaR@&K|H3MTlQltAL`6(IWy<)eqfa)&e}nmgQId@R6x|UHQsT5kooHp1sZr1V`QP zqz2N@bokW8brq9$*C82yHfd>$a=5kT%0yjP6uqx{XP-$W5H<$QaMVbYL=!XJ=EtO@ z3<`6J{g1{Ca64ML<*U9;}-WF675Vrm`FHqeSipQ{u)Fh)?z zQ3NCp$OWfT{%e^(`JFSrdRUM*$g>@WEJi?@gmoIvK-^(5S07@rC`o9{v-lzQhsDn^ z#w^lF=bgOy~M~h9g4ydyYD6gwMh#);2YgjqI(y(>m z{P^F8>b7+^#jjb_u44@`G^IX9%t}Wk1U*sf~mwMqQ-v-XpsAH1ic)D}6Skd?MTsbGx>O9UHOZPvp z`mtMTKdf~QT^uU2nGAW?+xduio*iI+wp6XfYUxo>|^^27$=7c^-ZLV(M9{CP3zEvG1U%rBC ziqAn4-N$p#wYWQ*R)ou)PqzEkYcGna4DO12Db6T7<9vZErt8246_$&+PpcYo(xdVH z?Fb2v^Q~SfN_$Ylzj}1!cUe$J5?m#ZObCQ zu%Cg@=e)$9yxpu~UV9JFx@9yl0s){F-@!_g@8c%#Vk~Tr?Iy7B$~&9SnwcvegWIK9 z>=bB=*;E+naA6@d;p$zaR-0hJ-qVaa`55-yLbzI!HFaoJf^7{QtOd~5_MP=xy>Zay z<=%C4o2uHSEovoR=>a^dPK1D&&I!mEENZ}MTT>mJ?B&lmS6Q4+GLo} zc&ntAI5=e~_IAA6g3c;-U1+_G73dv%#r8 zTtMs8-{U)y8VIIy){%;HdiW4O?%WXQ#LwiaZW4MFhK1L!*fd$o?^rsH>|S={M85qt zmYoJ*(W1k$_yNOQo%#7-UfIV!NcGWhegKg&-U0!S>g^MXbw6CnhISb@T4|Y*4Lydl zqmCJU1B1s+<+>KF?MYd%Jit+U7>=pn6cBsLD-82Sx*aOb^W2kb+!~Jy@5-Z$e73%w5s7ik|FKp`8kkt;oD$^^(ue=Q{kG; z@-Sj9Ip0~=Dl8%vlO^wYr^`aiPH1?Up+km4`3y&1T;OQ;crRXSWWd%f?L;Og#hhry zIAT41wbYrOW8X)_nC+KL?n{A{@R&JB7GG}AfvMMMw)j6-d2Z|mADAx?+*boXjNinr zMCWc5dcV*M_$j-Z4K@7E@$pd`cj5ZbjuW{7CNhUi(fZ8km8!$qvJB!;3wMu*rv z6L>r{vux~nnexGClR5_fmskKrdf=ZT9bU}o`z&`FX5f^}(p%dt8sA`j8}A@8_oSY* z)6FoKgE#7YXickgDfjkJaJW?FVp-}tAf+{r(VD|+ogAHgkFMT_FSkqKE(e5|jzx?;=!yE8q zV3AEY(Ys=v1XQ}f!t3EaJ7@e!k5jQd zDNp&^vot^4WB@^D9PMvsaO}jZ8L{ZbuzeM$bjpp6D%y%?{pImA+TvJ|D3-xYbp_Kl z{s9-g1UpR=bD7I9a4w1(rm^Ph679ZvDK+(PT1@L1= zc6oPR<8TzEoQ@%6K5(K0^bGBwQx6n0SxN;{LTXp8 zV75i$qCAT52X)O&wi1$UrlJu(l2=QR%%&lYe~&L2o@AXqa6?BpV$GEmFP~2p0`63d zow1aq@~=HINbNY{_aJuN@9URwFSpynnmYBF5M~l$e*AnnvpNEftvh*1bUO-kM}Y*$ zk>QPT8X5B?@ujVQl8l7ER2$ACjUXac%`2ayk5%X>wqu+_`D!Y(II_qi1sW)mrKVU! z3XNxX<+Ej#@?feDz@18p{xjIowB$5(uM7WllEo{5(~T}Qr&6aERslIKiStUsz%6ZO zRJcJ&w)|(S^Nt=tjYkCi3p$R|eAP2qj;dEk;L7%{%XQ1DlTzdpCXw;$lmU%UtIVeo zm7P!(n9qw;J3+lW&qnuVg55XKTINYqriKwfCrVT#Bxz?joxv7Hllmp3k=|zaC8Fgz z8MYqZyz(nxs1U0$w3kY!F?43>Xte}x3M!ba7KO*JK^G8ZdDjEQue z&I)E(P_0eM>2=UkPZNb+dh=saV8eFfJzF01dwqOxdjjt+`8?6M{gh*-g3CRcJ6<2L zu@PSJ-@SXA?8)E%@r>e$&uX*DQZRP=$(`oi_?=w-L}YN;e(o`H+Z+r_UGzR@me6|R z{*O1_b<}0eg9wnS?~w{~z|=GAzojZU44WQU#^KKtV+Y=^haT5hO%G8l7?|N*qnG!6J@<7#+w<-H^#A5&jGSxMxz-W;@!R>%D!COB zf(r|gsf`pqVg;AvDT8Xkvpl8;efM&0C2N5rQliNqo63(j-(1gE^SHZ4tQ;Ob(d==N z?VTO^rknU4!>hmi>P?YBK#`+UjV3Ar$11}g|G&o%!Ye!{B~0Q>#nvIio(*TffsO5elbcXT@04^+rxA@*ybw6d zJx47pE}#+|7%14(-xz+}ICa2wgAH~$cn=QUjhmFd=Z$hzI?I!zMF5OEu9}XPl8h9a zwXtFGv@U#KVO9>#abDB?P~Tpa|K^M*naD+rK3LsowG+`;FOK14-qi&@xC;-XP{~tw zdQ+}ld);W}hZMVY-!sFlZReWKhgg+2)&j~GrQScCiU;OOh5BuG14gUlHaFgQE7}WjDCo(DDzy?02>+1{ zQRLXI^r9ym5b$o}1 z;oPU`VX-7(rxzRFQl&Rk?q6W|>ziWb?^P4qu~<*YQN7goIX;HsdvcHnA)qk%(t?tS z)wj7N5c;^VwSz!A(hA=VF<2d<)0h}LaFuJc;go!EFQ8!_y(Lc5ZO|eRjut3GORG#V zRRoW=?k{p-S6U@QO zdmzoP*7AA#oGR_K9&jrbSi5K3v;*!n-R+h&X09-4Vx$UpbR-&HQrxZK`*td1aM{j$ z3EA+4vy$c>|CJ9QyeDpr%ubY({=#zlb+%?2)`#rm3@xv_MEz7_q~^!`I^2jLukc$( z9-}!jSQf!%W@6MRd_zHqSFxTSk`g`GM9we8oS8FN@lC6+`vv$CfCxt$_;^)1;-^>A z2}q0YWxXC6Yq(SP)95@@YU)+#K9fqD+0DyhH~QD)h>?%VcW}N`k3E*&TD^ zSX#=;3ZR&Ag zw&h7?PwEsxziqlxaO1rnTP+pd4jbVuOa{^w?}-|hdj9ZEukornkGye))+7gIeyg<> zAhBowRnZ)g!HmSV3X2VfC;>Bp9E~cTyU@DRDPl)^0H7(%w~|v=Khe(TYXDKs*VORJ zr43_%SA$_cyp^qCMIdFR{nD9KIS5ID`Orlo%7^D-kJ;Ug8cSZRlv=9v-#2bjaRN@W zHvlI_%_Wz3HKK(US;n>NU4C6>vOxWV zQ34>WIb~rBJ&>Dz(rgvy7#ycgqsJZ)-T55({F017)lSJ3|J{gz97pnX$1D+JYA_5Z!xC} zYgd%dJ{!d~dfjZA?=w+@=V3$5-0zaF+l+wxVV-lo1Z{J4sXg7fWZ4ovB7d2ltrxw6 z^eJXMnX;iz*kEHLX5(?jXa1Js=4uIWFqg`mzQ=0!sur zcn)n~rcf_za|-q~e)+~M38uqR^jPnB(+3OoBDzViSnS~hp3XM4pk}`1QQ7KS7h$i^ zH15$#MTvZ=60VRe(a0S~0)#&9eDZt(24xkA((ZqB zQpEQl0t`@hI?Uc$tflY^!e-$S;tYQ4%H{ZhbED-}Vc=QuQmFQqeZW5T2|^+OZAU3A zp1WT*=}g3?1f4h-XU>Lx$X-#;h4_3sWO%fNA_qvHI8Y~f;#Z}5ki24ZtdLSOV){Vy zBs9a7&!teQBSyd-Y~tl;ExuUiwV>F~I~}dsUZk)$j=;|()IfBal2tup6uQKoh4Vk> zUpSd6fepArh;C<7 zDP*EkhJG2$v3siijX(rjJQ9`|T{~E$40-eItrulrG89sn>VqL)MYVq6=WHvY!OOAi z@*^LrMcmQPi^I$2SpBA?*rrS!b8d^O*Ws`;G}lcNHpcdRI3%#Hs712*IT7ARY|U2S zG#=n&4fC7empQh|z%p|RRka$8GRSv|qwfs!g{ncZrXFzSP|m8Y8$RjMbw65yT9IzD zqs#TFu1pV5i#>8oZ4m#C^BNUH%k^XLxh5!eO8TM9<$fjMu=@ zNFBO#bQj|4ICIMoSf!tWX)yUX&HAq^`cW$DL$&6eVq)xyUsDf_DqLpQZ}fxW9OXMY zanOtm_phvKrE#Q@i(dPr+%DCP8V*bB>2cPCaW~g3y`LogmApsKV>RgoY@cmm zuYAsqh5>!Ifb0HvnWdU1u_uW3kD|KszXPIcGWRF+W!M(v)gn`dQm%1((1R`FqEcht z=%G@z9bgOV17Z|Ugb&#Y?)>2Ur-r0i&q|g17haZ_3=X6ZotTc5U-^9fVHykzkxx8z z{w(Ye^xUM`d&!G6k7iSsk8kU$)*7&bA!0lg_vshao2n!i<)>(Z0%h zbLIZ)tQ3gu$(xfVFq)cr@{)b3wI!fA*AV{wIz!wCSbwA zh4QXelxz77i%eFmk$KJSdaA?Ku~rGO*Ny!ywYt^XRQ%90x0*?E@H4%7A$2dIeRK7G zx|z#V;Oz=D=gmYW-g0ydqBm7*X3pm+3N){sE$?Yv<-SwhvBnH%wa8h&|!?W+i={7o3)y5=sw%Z=QhHHmQ{S}Cnyt+4&a^K6+bD%6p}N~ zPM5FcU4J+Q%%1!9)#rAvmhD=uaF^jF5N&T8&!MXG#XIguX#0g_969ApRIGiH9}+p? z!}t|q=T#6k`|IP=i5v|znFey0@vfU<)znEi*=|OjpZu>Sj#DT=@9*q-k{5MCb5xP6 zQRzehD#3?$!cmO5Bb+>a74kX(H7kRfgq>(0?>7v0q#=CXI-%uVmNV0<+n63`9vsYu zchyKbY%rML7vp40a7@bAq(M|I?VKCO#>GxDXDb-hMJ5Go|6W`8+&pm9^YnOHMC;*iP2XLgS)2 zxROur0b;Kr^gdUw)O5~_x?+;uZ_t$f(RA@Bw{h9ht*4()4In%Bp{Mw& zLdSjy#-2U-B^Z;#Wx7hb3!ky$FtpI+1IR;q&bvBbI}`WvXmmN7HA$tye%E00`rO_G zv=2}CZoBLyoO~SZ3$kC#%EG)4wEcu*!4%o{8M zn=kIqA3QJAoAxir_(0hsAG%y;)rhf6%cTySMbW@L7xZZ|wtUJ+=>?m_xp$hQJNag# z>6Bp1bOeDP_T5*6-IFd7Pv=8qs6_$7VMlWf-lAa*lWfR(KFSOX& zTdFm)(2SQ()R(%7qUXt)*O=RFEf33&)~l4YjJpaH#`2juHrd?O*w~!k^q}$XVw#d6g$e?j7ZxUBtQCsDXH#5N0o4;Q8Hj^Bb z2vFA;8j>DMtg?FNtxwWS=|)1$TIAjD8GeN19I$5qAn79oOO1w>)^Z zR$&cM&0~oU>Z8R#QmeY^h~7X;+;g>Yb zstLe`UzAL;PL^BEYWv4vO$BV^4I&`z&eu~kV>wbsU_ls~*=Z4Ktc4Al`+T0~XHsaR z;Ne^YC`RgSaJs3V(C|(jOrGEf(0xJo7nAY^m)0HZzJ?v&yjzMEXm?(qe53#a*4#pj z>}y7f>Gk@N`t2D;edXUq614N7ZC~PJy(b;lp#J`pS*d!hnKiVVlKLlP90zwUcP2g_ zl*si8S~*OXPQ}??u)@j1Jj%=!wOC!#Yt`B5mniFmvhAKW72X6D+%0*AP(&#_T7UYW z;;iE+%7tV2Y6%op9mkh9E5eJ_Cvxs|tJTFUWf}UmUVpXWJHuQhy9TYYvM%J>bm!m) z_O2>grcEJiSm|I63gXX7MxdTR_2L1WhpTKu#8dV{q`Q?Ln`>x7T_`Z zIj^HH?U220`b*rj{OaA~QtkRPcs^ilsr$G9-%?7L_CiiH`zJM)-u<#s4bJAK#?8rgsJv`168ee(G@9}wA zQS+Fa?6*pbh{p)_2KEd#Op<{J?p4}&C9?c!GqN_DBhoz4Wh<~kbVIkve{aAtjXKvr zW&I`R=utNJqwZ*L6;A(vgeY(s+mIBAYhB1WKF}?hld>u~Zh}~%xd)BEwIe-d!jZ0z zqvwf|;a4JWpko>&thfvv0$&&HSb2+dnm2Q2bwx{`4_Pg@S02oxu6iFvYKEbVFW8Xl z!m*BH^W;{`(}65Y(?k`8~CD;A6wKMvrb)fxdbYkg*56YlY@?5+|QCDCCFmjh|%x7nvPzusu^;! zK^ZIHG7su`(F`qUd&RV%A znk?R@X&zHTRB|HAeJifaNP#b&BC=W^41>g=%xRcO(d=zKgo^cIhkb z$8jM-W(HKF-p86(D`+*OhHcuY0S$s<6o1*j4->&M>|q;4G2`6k-0%D8A^v^}T(T)Y z7ydxAykM6BvUX$ePfF0s0Fq8~EOEmcL!Ufp-Q^PKGi;ZMB z1Z7Ej+tt$n-!GR(=mjwCv=k zg`3hONJCWac&L*4xF@o2RA7=hgpjVvX2GuY=N~~e18DdO^Qzcpi3rE|uEQLeP)X3| z2oR`9B{7nElhPBCaPpF*Lc;LziB7coa`Z!TP9cX=7QZ4CMWtPWH3EL23e4ZH6goE1 z`OlPWtToNjf*ul91ts;Qn;PDRl>9GdWja(#+LQEGP+W12m(sS+H;&D9ad9+;-F`jjLHsN2 zif@&sSLnjT((VSX_$6CWYY5fMHd!AG6AJXQXG7R=m&w)6j^m|h?&W}V$B8!!m0p*Q zVC~jY>+;ti)+YTv6TYz!Ywm&Kgq3r&fxJZJ<4a1=mUrBG>69BeLYJ%SLn=Tt=fNoq zJHeKZjP1JB#GPTIR>)IVskt-_3ZH}}D}7r(Zqi#5M{3?`B`=0bJJgx9#{I-=`#>;S zv1`bbosF~P&y8>>1I8HnFQH0tDcwKR82PVo#;zz(!#h&F-rZ)vh`OA7IBOZU43PmH z+i36H1WcGk|41hZ2T8jD&6Wsc9;8h^+aF&e+Y8Tc*$`~$_h3X=Hgw&l zSF1D;HHH#QLr(uzm7&+=K=4;`vPukWP>ue)*sZOMfol1PDWhJ9R|sRc@J|~7V&S1L zL3%~PmEdUC{C%{8lIXD7V05btxy+)_#3Mp%MW2Z*6lu1xb#Shx$w|vMM!Ku;0Fer` z-;2Wj_If+VHrhG^z|VA;;yuCVY*woR{A2AXLBeyXn9rBY5p1& zROd-V&ONiSSF_$2-o>MZbf;X^vH>lfs4GCGLP9@MHW=ByZM6~|&2#A4XF@3543c1C z<^=0P5n=n)@-mhC;|{_zhN65c)bu#{p#mMTA&?gc@vibmiI@p!(+bUhQA|uxO`cTW zp+=CSGJfYH3m_i>N4>eQ%@R23m9fz>5o{`i2fmO>(q)2x7YS|#6ZU(O`i>H!nF-CP zBTH@rYJTgK2v|>w6#K1Sx$`s2K{GaidXE3rtU3OrS$ot*1V)gW#J;{%Ko(N)be^~I zO>`9SBY&!OtA>C&p|ThX&`4J5&2HwHw(GlbN3PAg$vK8P%fc+Qu<{$TzU*_wMr31F zDbgo1sH<~v|Jn^&LJYq6d%ix+bxV)Px+67#y*H?5(-hJ8nk@W*R#x*yIfQ)OZYTe6 z=WG9Xu5CAn&|rwOWFqh0e%c!SNlo58^CK19|9Dw2%gczwxc%^q3cV`=Q(m^azaz#U zI=?o`SGnICWw0s0^8u%p$4reQZl%V_a&Pz=_w0?cE-xjLZoiforF5g@7qnl!SmV6c zcgt+@8@+0oIk~cs>uIRvl;j3O%Q7Y@p_TH>Um~-T_VT+-c_h>&c(K@2lcDdx>{F+5 zP@0M78?qIrV&+|>r)+3O4t7n`Xm@N>N*^sDm*td1yOSDsGmWBBk4h!qz8y8++Hp80 zX3(f+yld(9Rz006ez=ZH5$&kG5R>yy(cJ&oG?EyBkDap}18FW}xBCrEE%@Z!a9$`K z%r0;B9Y2=wK}{DDiJ+D@1hn}fq{+)ja1mVfSq;Pofi^4A(jd#ITt?14!h|bWzF6>v z)A!j}hsB68rup@3%0Nhm*fu38M=1C;Y3G)aUL-&bL$Zo2(Bh;=)AL=Z*h=#+fdjSn z>{p@84B*OEv_tL*^m-Hrl7h=*9Vp2pA@z|2Xvcq~6AA-G>}ZBB0Z{eeEpe*fr+!Z= zY|G}7b!YCN&ZhXG$DXv#JjyV1k)J+-!6O0*QO3`9HniVWa2p$$yMFzUV zlN|=_KN6e5VQd&qr*u#xkph+Z^CLhx#wEt34#*SDu~$rGU7AmcUf4_Hm^sYk7^z*X zYCW7TB>M{Hy;Ygw`n@FVt;_i*ftk!H0x}%g$arrcn-34W>^u=6p^1BhhlDRsnPn6n z4?bwF*`FXX`|=Gts6_@CdoOBMbe!5>gzNzYv3C&FB`r5}xGcj81%F^%o^x}xnZPY# zIlI38?_tI@b#?DTH1|&FYnGWmtUd^hF>~3LYSk9(qAi#b?q&Bw(Migfjy_y?N=uuF zH@)w3BgR;r{qap#Vx`1)i*AtfXuA#4olQ^Dw5gY&$}<=5Fws3|rL+b7U-GdB%~~R| z!aG+vg+y0Mt~AnS^prSCKq{MsW1tt_(tPnb>P6db@_)dfGR5VO9rse4Zf`AV*g060ABzf%882q|IzX z(0W8xQak@k>Q`jQ{Gw;!aJ>h}98)ldo%q~h38K|yv%V8=Yc`s29?aTOrcHF%njMQa zk^j1ZVmQm${(e1dsI)s6CZck_s4Ak!Sn6&L)mb1fnI85gQYPz7 zf8hxVh>LG!qxh}>m-hKI0wsDpj2A%chZ_0}tEn z7V3n+^j=UTcKDlX8lv*J20w(Wvz4uP6obD%1hK>O5jO`Ko zF1}Gm;8Z7H{7eC2G{#OyZ_X#N4#)+9~X>&s6PGar^0q7c=I;F}?~in|xs zsa`jrj=DZt2NQPb`D%93kUPkt^##keV7T(>QaU()sLP($r*4U>pdLq89cSMJ+%zcR29u@T#>dU zezIb!OJm@Y#)UtSK$)y%#d>wJj!_$W9Vxseu2m9&ZD1_}Jqnz_Rom*eHG+!YA!*Tj z62}i9QnkLsq7S<`xi=mi`V&D-O!hBjL=gCcO}=omr7|M3?6a zFsv}UM{eK_{3^}LEqAp=6z$zJYt_-hN0uHWvc;m2lh(`K5*_>17kE zgv#`+?aSCJm3&$LAgl0-)O40&e)>7#k?sXz1;MIhwOnF_ch9hcbqiv!5>aXa(A{m? z8G5998hV1R0<(kN~qV=~)} zSnN(-Ds?E%HPjSG=ql!~Xgb7~<+0xVP;{cW|RXA!8BVDd*{?_ zLu(M!d#2HkIbawy?V~eXs4s;WKKC8VVz~hHy>9Yg61@ak1uez2vE{C=hjd?x``OTWESn}KWLKUG!x?jeHWozu`rMAzlMV22{ zh@@+vFBiN1@cgCD6ER?PjU7qc7c5b*$cld$ndtnThu30VNLaI(5o)4g^>i$W_%%<6 zcXaHPSxu3H+pmRy8ErLJVO3M^ARAUFy~a@4`^LWMug2Lk{uwXd!UTtI!B&Q1`olJ( zqvP3SWP(N32)=bj{{8;z``LC^va=f3g5G&v&_EhBv(Hr@Z!gQ|s@fgQqqMYFN^_kT ztYjZBD>JAIq=56&*#6xwk=*+tDVroPiAh|85+~JZ`B+CWLk3=P(7qM}LrhpQpdv6C z$mzz}p49sY9&vb4W0 z>hTo7UofKct!C3n=CC|hK8%f%x%J$<$Rxj$)$H8$nsjMdaB%?LtH4~}N`{m5j*XkF zWF|o#&RPsw|B$w^jl}teido>8vssuAlr_EPsCO!Ev^qbiFNk-`(MVeCd+-WrWQcc54;zv5S|cdPwCm^;s4=V`+}Yb{JB`VF+`f%-xNCYQto|3#rHU%q9S+Xaag_+M)x;$-eu{>O(EZe=49~yY4G61 zXV3&e?)%=Tu}t0&)r)gdXBMn!-aJ{wfkuigi&`n`>S!4mDZ{Zvsm^@b-#!97^o=tdflODeDewZrxfkFw zPz*Hy)3U$#oax#>MThtw{Lf0w5uSPL*m=$P>djwY#Q!eztMCC@K%1aE@t1%5*Z(OP z1fTmj?*{y#;Mb6WTxv92;Ql=d7@&jGVF$6KbDRnEn(ubx)S8!o^(jf{x0 zeeirdpVd`=I&O{)N&$i|1=jP@B^03hTNdg~i@Z;MiH`cT5sW&+1_<^ZT1zN>xO4j- z5Av7?FTo_Cs|bf({4}isjHU>i~3+?dSB064kI)7?G9{Ig7#}8K{9~<4g;cfVS z>}9@DiC!2 z)t067VcVEw3~+B7@H3(%Tlw`L&O!|?$x;3{JQHCZV+5X)d}J35`@$M&(oOYQ^STrZtg z#Oo3{GsbgyCr_jD@lvmJHao4(bdp!sS2vdowK9e{yMamV-z+TKH=s>7R_@qc>iR|g zmC!xtXtI$b1N+pycY}HKeOW3*xPw`;p+bGX30sT7JPoU`T+LjXvEL*tk*|P+_1vLZ zdr(=c-q5(_-B=Dpkdhk-tVsk)Xt8Ccfme{G`(us<9%Flkqp-AW*rPU@K8tLq%wUPDLK*Z+ z1)#rm73xcu?}qyI+hlsIC+z9(p#iv|@rH^z#Tw7D^EeLfGPU~I5W#pd$;hs^*u|>f zLf6v+xIu2>u&!#L{M0S>TQpSnNA= z7wB((LCgE3-s61ul}%dx(c5gAfkp6zu~C?YxQ=u#pDpL|y% z+G085%UlmgNO}G#sfjYt94a$GG~D;|rehREWQ3l>RS3y9&rridl%qKe4Z6?LBr-bf zD!c({l+5QaX1NPo+IOr$#z{GH&(lYXj7r29Zc&H6uI{?ks3NYBKk_UWNJP=>yp;Zx z?gsbXyeAslYwkYscbND<1gU0!|LPPLv+oJiW{U-5j$f{nVRc+bah&v6qVV2@pWTdU z8!u}!Mj2aOV5rWnr`VxS$S6m1FoMvP?Qr`n#oa$FfQ=|Mg_6;X74%=qTiYR&-E!Z( zw6PLnjnm|PWNnM?m?h>6wS4$qRcj`B2=pIk@K zF+j#ZN=t^&a#u-64!LR=hS%427v&-E8e@=(OL}5!lqJ)uCGY`|&oUKqAsD zosXH1RJ`ma?)jw5e7u-(ez??(mQKX=4e}YALej@t^I9QAJ&w4p!yqWb$9TaLvin)l z+CaM88yS>MUB%rmUA|O4?-!GPuP7ja7)e1;VLDp-VkHq6e_}YS^8glz`v_Y{LHsX9 zk_#>>iwtal*%xswt^+EDOO}5LjC=e%;TcY~Sv>i8uoE*<0_`b@uMe%(;-OB3UiyxK zmF~{x3wA9>86`Ik?)-SIteJX;Rkiq&;m0PPItz}=pmRkzi1 zBprcCjMwQsGdhV(hPHdD{rX}@(&ZSxtJKcbDq+dpeXWeQX0nh1jIx4jTxjlQB{brV zAYl?ozk;xGk3xRBTx=Sq`hI0K;E#IpPPq0yII8Opssf;s0j-L${-pD+hx3sbxL=&+ z%*;QJ);Yy9hfSOQn8sm55a7~Ju|ll*&Rz+4pI?i~s@(-i-N@qri6);)2OofTSCG#D zvnuS?bI;srciQ5+st+|LYEb>stH7!v2T4FmbFRn`xwrn6fFwk}4SrravZ*tQn?wt+ zM?HXs!g&kMz<$cS4r}igxTI4#HvpLTK=XR~OVJLN>{MskU0zAA#uDvr5^4@E17EUU z%k8b^2mPkY>l82CRE6qe4>3Q;-Ow-TY{S4JIfYi2f`UY~-HOt??1+&Y`#~@v(%g0P zRJll24Zsf(xub^+MGY!a(ehX^$+;2E& zT6A%I1pFHM(7g|WQ+-Bp<<87o#5_5_!oL@>Jh`R?*=Kz1YKx@26YFtE}OgoQ~@KO3LrWDD5^Zvf&i zqz$)^4iZXcQN88e{9|{h)u`!O-G4m{mAwt?t_krP?3FgmFGMXF(Z;hpe@RAV$XQh*mr>@^?h z9k_DGnhmr^V?;WwO-70{BYCN1EtN`P*SYhcf_Re7&|p)LPj|AU%NF0fG=ZF)wsa7n z-{gv(Mdzbg1CH3eb-2p#uPPdxilT537m`tg()4Ye91o-gLR&_uFkZ z;ING~d5tl=t9TsF2+N7MdM__x^5A@4?Sg9MDz&$KQ!Iz!I~U$?A7Zv&T7|oG>8?vt zMiGroa-cEN7N#wb_O;wNjyv9JVYA=(u9X+z{B&{Xov(PYL;&3)07zvUrmc{pnV-Xd zlW}nGuE^Q*^NWYC^tJlafS(frf#i^BpU5 z%!o+X5@{tP^4->05wan-Y$D6ExcbQ4;^CXmp3P5*C>Ob7v7BTrmL9Q0Z`JE=BX;m1 zUXXq#u8BQvJJmD2PDm8lvbmaYQ+7+sar%7uQWGuOsMnxl7d7nYA)Vn*_2^71Am*MA zaM?x51cjnZUrQnZNb{pa2HtQU%*zyy{oZ=BPsl7odU5P3c@t7hd8ga%!ljRH^S5l$ zrJVfWQ`mzr-A4WFXH3UOI&)jypRYNSpc0`}+Hqjtf$~Je{zGnZx|P!c#~zmk?V5z? z3vGC;4I%@_{!5fki9cGaop!F5r(?BfA2uqoQ=4o}j8)jop7*Wb0kbG`cY?;=)Tr^=~xVmCu^XFK`vJ$u0 z5$J)XPfdjQ(>vD3%|G`hAM-DGG3q~0S^cav?B}Mxj%iyYDcnHC6nF&7>IH)`A`90m zY0E@T!^+;63`Z;oZM^9P?Z1;9!MawaXG%aQ-Rudc4%(8TyoSVMvMLJo(J^Vb`PcGr zX-Eadge#}+i(YnOKgi{J>Mg_4X}C?A#BU-7;lh)T zXYayt8)ZJ+m7&V#InGX%lS*SwSlyuJ60}<;27Xsy;jDo9k@NNSt=Z;8gWS3SrUfe5 zYH<>|VEksgnG_yLjGW9WiKo@0iRw5v(;>7mmPszO+a)G2gm;d#sB<@Y{ZLd_7O&Ecv!#ITRQ!dCr%>_ z8oSN+wEEV_oLH5#a^k=^6NaE5ri25|_^-E7x6ypOvhl{Hw<%>IoOiAG|Lg4y=4vP= z^DVUAeHAHtI{1Q`LOEAee{9|*l1(KmGV419FabIh%fTCmu;FmU{-$TpS5wTsD~&TM zHt$U%5!zc!78rYwUwvklS>}_kC$rv-`i?O33{bKhsqT#4in|lmPUPDPwvw-&jD0*Y z^3S}L^ydCT4IWQPU!?w2NVk~!8$8B{510nK-OHFJJ?qoad;GEX^~!*=Qt3UzjftAC zU`z$`lFAzurv&XN#9-t0D7_~hyv{ne!%>$N#btEWpNhW+qB>?1>!?#)XA64Y4a~-5 z22`|lez**+k|k)eIqc`mZ{eTv;2ZP0*}xP|r$^cXqh?>wl^{3P!4xk!+lY>xoz~hx ziDg%*87ZCM_UooU5HR-P9ATz&T(hxIktwt|(uFhd;Ge|-rw<^3C3y%$LTB&FMrGu? zV$p-|O%A=MCF(c`^IFEH@&S3Uz^L~wLXxV5UH3KCc=K0?mgS1lUg%7H|A6VU*qo!1Wf>Q576&%l`J4l3`$eKjRc@~ zaxVimMlf$P=BU`Dip&q7B~Uh?$;3igwC7+oykwmsRY|hV`94uU+?aKCOw*RP05Nu; zLc5~CHBGJ5mO9Se*~sl4F~UYRJHbE&i{Et88?VxpTz9YRt`7G?R2y~@w;JBT`0F?0 zGK87VeVOz2!|`Da!S3^$R1h|F)*s0Hi(@nMvc6>Do19qWx;^dd8cl4=eLyaa!d>}h zR#jSHHfkRwXh@2e!p9&$Mm9xKg0s0r`AU`(_g7F;Bh6!C;S%!W_P@$8e=d)oUZ4`O zsu;u1dC9N@y*PZw@{KS1He~a347ewI|fQ_YlKk(CA8J z`2wf8_N!dyCRQ#&ZJOFm{o<0C4w)61m<#o%&tCXG=Cs2?gP)VjC-mMMs2Tz#A+9IY} zx&Yj5!U;w1R`Ul?!3fKEOdq`7u20JTesYtJTaKeNk=20OyY84n9l$UgRjR;w7$x6_ z<`i>2s*Br0B{?WQG4_LHp!Er$JH+iuW&)pze4EW+Op>p>4b5F{@S6p)VQ%m7aKaTz zK?b+*r&qbyuQfasq&vq}xp>QQsgbg;XbqN?Zz37lJ7Vr@7QFX8ckoZUYx_^TD|{T^ z?gE~3kQ$e?W@5_X@z2x4Of2sISp?c92eq1`VvRHz>XL6$ba-N_0|D;^amgn2UD(l@ z$h)qBE6ydc>HZDj_YK<38;Rak1HMAoE~=XPCI=5IT`>R7m%M0iNTv4*M{i}9*f+tRIg*$W8^!< zn~4s7AsOq~U836@XjC)HKX7_%iYOkxtVl55_GFS7-c>lonDYP-Bc8}$f2`BI#(*Fa zDF$ZYtSUYJNrp=JiCJ&MJc#K8@EDBe@yC#mQh1Bp*2FVL5bUq4+a#AH8pF1WghQvP zBSr6lmJ{!Tz@Ee79b~^jh(m9tLk_49RpXZOfl7FejxEqPjpvlLD#XaToGu5RNeyq)*OCP#3LO3(9t=ixYO)mA_E1AfNN* z5t8bY4ggrpx{W%Pu959Lmh?7+VLQrW(H51M$ed6;m>0l*9?HkyceF1jyg87Xh*Hz> zCcLQ}BpuncCg-XJIdzBcRz`Q)$7VSPtn46T1?8Z~t>$`F<>QiRws6`Yz;W;beUG-h zH{Z@U?=9<3>>Za{$8Z)ux}q{WEOw9KADf!*gOCM~DKolDa2aCR{e|e76LKmRzHc3=sh%n%D5YK1>3_PkhD(sNhy@C&I1tnw^IaSg4!?*#JJ01J(R zjUEF6a0L~sqI&RA2nh7#j9Qu5`Qt|q1tY^obG6pvH1o0Btrh>w20?;|u!-TOM{b52 zgfO|>47RxV?5K@5E5hSHcPc1bvQ}qMtgP>|JN^@2d5bA*1F zX8QaQ1S&U}&h+4(oVnNX=luUq#=v(;Q1KPHVSWD}H^qPaO>_Lc(IofwpE=#%-c0=S z27&9_?eW~J|NgW7`%}k2C44v_;m7s=z6SsPRD6y5=>5-smcjo6mQa4>GNOnU>ih8j zdK3{*`rbPoqWGVi`QQKL??17;4GQtR?pM|RuSapkug%$8>3{o9{{5VP`_w53u-N9u zOr`(WQ~&WO2l%xi5SRUDbo4*}i|+wovBwXjO#iP(*~G66*{8VwWrP0vQ)$6sS8at2 z{rh+R`_DRsUmM2D{QpN6=;CQ!u-Nvq%-{acNBMsn@qc|0{@+H#A1wbrZ$z4e?}?(G ziS5*e-?QYD>UWncevnbj^qb^WT&T2P1CG7d+LXTojfG(aoRUpwf3-+H@$u=Xt##s| zg6Up)KZ+9ElWQU~KN`9a|AP|p;>VrfRqL}K07avIt&G=m2PsHKNXeG-w5ig=?k=B`7O_nS~{ozGIJ%?O%D#ezkyP|9u!^$1t<@uA zqw}6*Ww73t8Ga5w%gSRmnpS=8FRnAzd}raIrQDIraik6Q@DE6&?GfNZIf6QNSVShs z^z5R5qL7({wgeYS_(kljs$VH8Mf$^<1<`{dQ7#3RQO0oE!>I;y?y$V{Joo((R?f4BpJPUqPeY9)!0&G!ckhf`jJ*Lt2D@Ak@VqIoCZ zohu24?VR5Atc@;5*a^2)24=Z@apY z8fsXHFUMLWcOsSAmU=vMPdC49~;2XkMj@E4PzS;xKFmOOZ*vtb3k#TnN!S5b&y2qY!MkOY}#y*U_5y-}SE$yMg; z5U^X;o45`;b!Lf2z0%h8^ykPGk0H|Fp4>Whf+10DCrkeCZ3GG%8^4Jl(#~4xM=vY; z!pxu{WSgkUTM{Z-*7S5&_+FjKWH1EurB`h6~N}o8<<65x*>)axR-G zrtMOSvwCpYW=dfKQfgi%o%OAw>}fZ3wMx*s0%8kFF@Ek9%+Qu@!QWsX(t&vH+2}_3 z*-phaX1xEn&m`UhY1lB~&e*#wlSnps%x=%-l6OFXW%m<6Y8CFwRU_*ZSg|PJ<;>SB zGAP1xQtPIBc0%%%&oUiZoDZW}8`dlrBZn-Ya76FeSscrMQB!MkSQEDPa7 z_q}x%D+6vqVuB?Mb^yY4tvA`Ybk_isY&cwx)@oA@?+|Uyg<`z@6opmzFF_C zA^5_V%a;Rs-6985XX(qF(aX;b?TYl!xT6gOs64{wok7%odN%RFe|SITV0W9fWRH2-o<{l9gz|0020jszr-!9DHP%ReRoC5}aNCLg2xFD{96Nq}xM zS`}24SGZD}yAw?6KAH>^d{za(8>h)HhYp0K6qs^7V#*`oa<_fedishl`EKLDAy7Aj(l4 zbEy?s>dJ=AGlM?k=;_{b^4t0&IOyA{VEw|Pc_xsKKM5{&^!i96jn^TyxYy1$7ZSG@ zs9)0UQkJLj^^0>JtlX4;3=|_QyBjIbiuiexxG;RCxJL|16CO7AxC%S%N$GD!`bt~V6fP{w~{-~Wuii$;_xJfEVhq|Dn6URV_=0H zc$ZAILrVeM*-MbrH;X=6#uN(}AF2TNY}R_c$tI>>?oF-3lqQ9jUmQqI%sl~3vFYJV zv?2V`FJCRh7Ez$?PpgCtx*A0|Pc!@OkkTymhf7C;OFcTI#tiVRst%@Sen(f$T zl?Jzm1J!zm15M$&bo^V{wV;@NGc(4p-H~~{_VSqX^{3E;V);XM5K@XM33SRXC;2y= z4-a92tJS*l?=FvzR&fIpCH+qs3-L$7X!}IW>>K{NFDrnN!|c}W@rMG zxJ>h?SIS;iJ-P2}%kG|u>!k#{%4vD00_bndsHf_)AQ=hJhU zMwLte2RBL@9^;&_nPp@u{LaiFD4t9$3n-(g;GpE?b={%llOG*S++^~rSOZSLoN4wg zRV)!0z+oXw|Gll=O`RD5xc$y4*svPQa3D8a^XBl$jwv#hY{ zMAR@SzS;lLreBO{_c9+hsg@;7ZnNxXjM#??Z zqyt_^u6%37z1nRNxvjxtfga!6qc^f#C^J-inx|;5F&Mr`LTIg9>e$=3U_x zbkM;|t-pVU7M@7YDEy3N8NkZ5oA=h_m*Q*^Up9WmY%{`+PxnLW;yelh88bgzTizMn zs0H3%fX{J(Ch{AngrT{ClTXb2#7th*E?QBR|9qzcN0PLjV-vMA-usDE5!8l|7X$Ri zfb-M!ec=4$7tP(#pU~;Q<)fBwR`lG5LB2`3-yjfA2$^|%G?wAM`P+?Z1}|J_+@ysg zjL9@FoEGM?{K36$)Y~t10(IFq&HFUum@M*Hu%)(#qQ`Kr;+GqX(T^Tbiz|3|5ImO` zl_79UN+Cbjazpe^R^lzkt4u7+7bWGzyLuD&rzO8A%57@a4<0$7clt&Ik*nqoXrZDi zq$1W89uJ3;_D^Qzt!FN6BFKP@YVyhJ|BtV?j*9Z@+Q%gnkQQ`kR6vjz5Rj4(QIQ5g zLWWWSX@(qHq@)F;q+38jsiB3Tq*GF07#gXe`*+6A^FGh>uJwDr|HxV{W$wApKIiOx z?Q37xeN37A9?E2?mmgh_d-;k5^aAN1;x_nL>|kog$wu7{9hL~PRva80Ej?$-sOC6H zFsu)Tf8GJR__to+<|OGT6h6Q8;Nz2(C3F-UdJ9#!ra$cUK~zGR33LE1()KZZf#ho= z+#gqhJtErW)$G@nvRn=JZkjZ&*um{X{a{XGl*T@xl0QZ+aWeVf&vqBV#v;0O3}KTG zf;P?c+rRZj$ur?y=hnKpzud4r>sKhdpcv~s7OP{BBkk@Li|E~MDdOywxLsd~;Wd)l z?35{PpuqlcNxrUZ?vd8HC22X#h5R$9JD1!8W}p&Z1LeAF^>3A<;2JVm=}# zqk@A&(|ATT z>q!bwyCVK%o#`RbH??FjFc_N}5RhPhM3t8+FVV^Ys_UgUl4Gu7BR;V&%_4 zVAE$yy4*^~*k9$bfMjJz-=Amwh>;wt(Z^6qG6q7MiSmI1QV7(ah=@{E(s55YP2@Vz zvghqt6S`_%mD`EQXgnh?=Jt##h=1i$yS7qN>=6Y&iZ4)2Y_4`lxjQqvW4(=5;n06; zJbf}`D3)h00v!x&TfQ5^$r;usD`%m#e(Qr=t_OqIE6L_@R{34&-0eSaAfz#P~Yl|k4dyA_ipPxuz0poqT&dpG_ zt%&nW#Iy1)u`ZpyT5M!tiC=dqv-#zr@#!G?R24R^VlAtI?=OJhfTw-z+Lv~kN`%8g zz=YqLnE&8{yH8iFRdF2^v>0)NTDSeusQ2gZt|K&`B8o-M_=^jw1#OgZ5&~kIb z<;`Rye1$AG%VVgNNhBkvE694qtw6`35pa5MlL&;jyuJBU4o(|kIDzv#KHI1hisEp7 zBxc1U<^eevOUw_uzihuh1zbU|tsq%c-I$9TSvKkq3=QT%GuC))3;iU-sCeJ{2}ej^ z@^Qck1QMK$Z+$&&JPPJMp7UfsGMB;$&@Js%(PUkvSiykbpyN9;Uh zpl)rdI_3^w)a484;jF6asvR>x!&ah0SWp6nxZj$hd(SP#D_^-Xq#+E zM@{maP}q9xVN9HgBwTGz{%PbdC;y%hDcK@RH=s7K=a?KdSnxuF;HvaI-8JXjFjj~Z zTC?%XZgCOI{C*Xv41I^xWK`J3>zY)$ni7hk5;A}uREY@2DHe-kU(LE!bw6~g4Tmr~w!$N;{!;dpm0mat9UJp$Mbq~~8O@K6RJsKpl5{d{- z(@Yf~f*5fFmMRgK<3oA_UK@G)@`-c&M*Pe*cTJrN(D6cNJqVwB@1%LZ zWF~eIOj7$4DNg!spSgKF5h+S9;iz;yllh<*P{69ZSx(_E`k@5uyrHPZo8ieGI@Kb`)=5GUfj71+j9#Us8^{{UZz7ipZT2Two_aB=_cgjR0tE5Wa{TAd%2kiA?sSfg` zx&};XfEJ_sL$$p~=!jx;MM*Upd4N?wm7 z()~TQblndA;HqRP_3xV%4o(Aet)GBF?dE-C%C(LAo((TsKlhHu$-5!3DTDA5@N1VL zP=kbs{|Xx+R%_&c1x?bwgT`P*cn9%9#V~!H6#k09+x{MNJmvWC&YiQIasI%i4@fl+ zST%!-y~QSw2ugptw`y^hq_;Fnm6)>L!soa1lfh{8_^HH8_Gr0Fd6I(Ii5o(Q4l2Y( z_0prPN!;o`?@9fc?evILjrvf{`Qa^=^xfk%yB{i+E2n;|3$@N@Uik^5lZ4k1zO4bA zvKQ+aL9-U7hZK&*S0_y0`7gi^Ay4yc=yZy0c0@6)JwXAAPr+Eh;U`x9UhxyA1VPj9 zcLsz%Jq2gU@#LY!Y{JUrH0&HUlFhvQ$&-Q?l;I=3BFN}V1|r)`855haI)?cx4^u*G zM3?Og@unPRM?c1L(Z;`imR(+~^S{mzWYs>_w2Q{0jXD(>X4l$z-TnTh0N}C5#t&Uj z>2kGPTfAB#z$V1Eb3c55$+|29TrE({SFYVU(hDjjTw>GoPhw-&ix3%_-3JtSQ8nvt zZ=~g9f+%6tI5R>&&c8{Gb$QlJ8Jo>&hn^@1G_}%M0n(R~Cb9z`p)q^8JGoy)7q4BG zKr}I@C=81Vw2gA_qu>2OFp;*}UMUHk(TufX+-N2$2Db|bUF3hQb(yi3!Nz|d5uyKH zZ5wdIcwv9K*{|NsTb<2I^}fpDzFg(l%mw|s>W@ACb>EDY!?UDyIjvbgJ3Z-FqHk;B zZ`-Wfq42C*t!VVQP1*z5tAur^sZSplfmhwoU#jnba$7tCG? zMl?L~ibA3Cw>S!5)ei3C^ek6CoWy33CiN%;)NsFz$Ga9sT~%kl@OjI=x~+%krAtC5 z@&of7m*iz-Ig3y0O_y22lnCS}6RlzA@cFRHOof2FYG-wkZDGpnXU^&jbn3;0!BDKtPImw$?Ch1C zye?C;#pt=nRSARnu)@CP)j!wT=ttpgMbdja^i`jATYg7aAK#0S#!e0@Y;PaOH%^g` z8oAa}xSym{0WIi=MT~zM$U9b#2)A>l*ud^5{2?qMVmUR&J-YF`rU^$$#N@i)Wg`%@ zm5s6sG~fA%(t1kAldi?C-zv|nP#|Y%)}2tiPs=+S$*VOuzJVGJSp(_xb8I;up|+GFtrawdUG#r69y4Yce`jg6{oQ>?xP zAMWOC6_zyMC)*@+YRE@-_A43HW(olDRP$tJKMENpVXI$c_PKi>qS%Z#lB?g9vpDt3 z+JBfRo1EGa4|CDEY-5YwKEy<#klPs4cNKs5Vf?c{%iB>_?U_!y$RlH@L2ZK^QN3bQ z_DRHpjdxIB<>&X$mgeni+Z43lMYGEH1{Qc30XLcALSN2I%+KXqMyHFje(e=4r}36D zK$>|MNaIAqsp`KXWcIf4NN#VY{$$2}o8vMMPxsO?Sa(0SvFX}nlCNMpiS#U#pb^rsa-j2KA=c)rRqlE zVu2=Qs*scwc^sg`__N%~hw*Ys%RxHx?XOUn*Nr$GT9cJF=VW-H+F2$;RU2-be-khq z?^jq6WWMukd%1H#PGnDVz;L4E6(ZspK4_9%^&(Y+t5r!}GO}(wTGTgKEzj3Pe=X}B zJTHa(Zfq>2i;!4-dnWm1E1$-J{=5F@_~ZDW?~2&;PSu0LoY20(l#%Xqi@hoFx(>>E%1I*RcT}Kb z2l36_g`w|rorD>^!zcV3)iVAJl+DJX8jV>h52PJz^q&eCCVRdunm=r6uKz1Tvs_5l zldE5Ed7sJq&;*naf3JN|LinqxNBZaN;i6;f5hV400KT#XSCXJo*dF#lCRT z71?ev%|%Yp8YG_u5c)Y)%4@7TLl|8u z*6r@{(f5`b%)a^TzNb1GX?^lA3F~>N>U0dVIhxHf65EYZuHCQF=wG>Q4)bbjYPq9Q z17|j$S_P$0TM@j9EyyaVG~6`lZ|j%z}}j4PhIBk8NFtOXqb;y8vc{B z`nl@9)h~ZWJEZIh92lyXEwLk577Qc!OT6hR9{f>H5$%n8^jfewMopTYg*lttuXKU+TwmNjD z4`iEFfW0B}2#d7LSvk?ma_d-PN_Q|+eL3F1J)fe@Mz}44k&@N>Frz{KPe&p%3Cr%7 zY|ry-uPhZm`s$0q6sOx#b2|WNjQ+3K}(QjNg0gKD+MKxXFZ26 zSRZ{ibr&JiHXq6j7dWQAUs(Rg&2-j>Dne?1>@i^oTvbFtXYQDi+Mg^U5qE8cV9Xd zO+SQLm4+<@K@kaT@sv!}ZP+XZwXwN>&XWxn|AZO|aJ6)Tt7Wt%pq2Bp&9Q~zW+o?B zxSR8e-96??wU+@(N1oV~y#9hMb?2)}&J}n2f&e<~?lX(>04$=kyCHo<8X~W4qij;&v>9-j^-#**BU7Y##Ck?=HwPo73=Q%eY1>cu! zthD9R%E#7*@z&|{r5I-HFN8ItAqnz%lAi^njZ=}MOTI*LDon3vryP9a1k{gC_SgNe z3tema$wwL)kD8*VJ?~47e$kL)<~@|uD%MIYNDvdFrkrIv0B`=`klE#~^}D=?9IwFl z=tZn&8CwRT!_Q&isz7UL>39h3=w65r6Fiqq!ZA(AQASv@lpDF}8VLmX^Ny?FP4NPX zjx$~adkt1jc7`Vhtm*ro4L=iS%7(6yI^8Car_?EC4pn#@nM4WN4ncTlC!C$_Mfwzy zTMwnyq^Q400^kcImb{!XGtRReIGD>ma%si##k+i(FRI8<9pEvuYg4)1LN6_pmXO%L z^fvqPbdpnJ<@x>PkwVcZqxsHz>^FY4-1_kZpsMX7wbJe%hi0JKC!H*gPEFjm{i#lL z|JcDRu8K<%yQ-r@u3Qx=;TYKJMgz2>H?%Z3FI2`BchIbhdhL0_jwQ%ElHl695=o^_ zf#%9~z3!h2vHCDu78N(l7j@YX&rA~lQ3#lCFeTV@Kh85>tk5o+ zc-LiSIa-|ZX1V%B50g=?OHqU(Gs&EDsh~DmkZmYu@=9x%`5La8H<<%w9`60dzCq-f zvKTP68TiX)g57i+xOgI~Jr1%%FGJY(D_TnXhF6oEc$~RX`bJthv~fWG;8Lfn zJ9X-P4-rXh2U|_Z8v)%?V>~iO!*>|>y<1mgB!OTVf3w%>J5?h{Uh39>xvpBVp##_u zMnT)nX35v%dhdKqdpROsNQvr|{{i1Q1r&VDo#8x{k!t!ctN@4S9@VAXK-?xHfJlnt z0FRdOh-Uk5B$&>MNN^37=d0z4fdE}xDB$dFV5i|TIpfQ{r*mia&z~#gjJe$*`YiFA zcS+!$vwv|hc6s@Obvv(Rr&yr^R{1oIm+?HVuy*FEYc^BI)e-vt6R9@jeTUHS&0*Pg zG{}KGKb{Y|7`-CztNzguNY421?3jGE{o<5(($#vji%4_8dBVbZ#9MFAxG%NdGH@$X zYXB?FRK+BY2@@Lr(=WJ%CSt|wr3f43kcjR{O4mO~a{hJbOj`9DfewIV@+xA#KTH?y zxK9zSkt=F9PBvLtZ|d7=rx8*sb*g=>FP`rl9dCVlGoaXQ$4tO_Szvv0pp4UctW@>{ zwdk5q!np{Hsn3ri`H=`I+0}=@7JxBVg+!C+VyrbcW z=cBiGXed}8_nEidU!?;nB!k9J{z77ZXLdb~XICMFgn0afW?#+_pmY3nhil`^sh*gx zHCQQhx)o)y{qdy}qV3^iVF|Pq)>l*zn%cqVO?>|-_+5F{>3otpA2Bh$E%B!~cp)SA zHGL{BLqOPs)g3Bs++(}RtkC^V3I6^eA@S^uD~R-#%+^rjN9bSo6V?q}r!S{Ga2@1$ zT(6=^_(thQ0P&|A^-7bRCKV4{R(0phs+VdWf{${hAn`hS2R_PkHoBh+d{k@jLks$< z8=QgBRX1ltKtJ7pI_6M3{GAiKcn`}hRVv~qKc-X(*k-4FLDO8JrS;5JzrvOPL7b1A zl1mp}Hjqp=|JAL>aJoL}x-3NJ5J|(4bbA2&(fGU;LG{svma_MG$jzI?^HBU8lc~?O z%cQ0`*Fl#xUb{EG-+ky7F{7xhKWHxJ`2Tudz(M?pmrzw|K?@lE(K5kXs1=Alo`Ye9 zt?7$;BH)9-t9Ko0QbAp>-%R&8YlK=VDl3foJN7D`g}hrU^p-Xi6=4V-9{4f@U!rqv ze#8SGV}9uw3n~8lci;(Hf*$Uz2FU@Y1{$c6y!%~y!EDs@JR0|GAna}eVm_NJNQ3qm~Ehu{mXrhfti9$Ooi==a_&P7$!plzlQ+MH1|%PQTaUOffZJdKX6=44<^U? zw%wLeh=3Im@ALKhC0~6lhVmdlxk6wrBWPFFoHSMMl73|!3&ZuUwc&5Gvt_;qON&=phe0kg5>DMjA>Lh&iUW!^ez)y`BZlq4?_63*q}c|v^b2jc^j$_qw^c7cF~-}Fm##vcqyl{!kH{b^>a&C^C! z@+6PBBn!?Rdq#>p3hr36M^rh63j?_*qFt>E*(#e!eS1{6D50O`G^^WuRH^5<;F;&* zS3LoMz4Rjwro2R*c!ech{|r+*P{*h@zosZ}6*Bv-=m`g(_}o3eF1m}k|C=VKix+0z z2)YIld6y$*QuMOIWF;DMAOG;4^Qr%KRAy8c_S%b+P}JB9EI8%<*}2Z7PK_JsXi zX_A=a%XqSYD2fugg?8iP3tYlPJfKK5LaJxocV5*>fQdbrq-p4gPIDSm=&x}ueC&-w zrcx_-1D6Qd^|r?-oW4JTkwAqDAmPZv__gC4!cfI;FN(y#Ne<=LsAzokTGgaYV=t*y z#k^tuVxp!(J*RV1FxsQ)*uMVNLTA`EJ3b{|log9@Yl>nU4Vt)iZf@ejv{1$&M~Eq9 zj^i$P&Tu>nXU0k?0c!ly5l6&)@*tvu|$%a(-(b(g|a zXdZ%^sv2tOY2XmQ{aEZJ6p4(8&jgm&nyf5< z-yq@{#TSCD-;P;YMJCinqgvX8e&Ds;c&^HY$5L+oF zO$nTEO||e(kJ4f>C7Y(xBMHP#9AAHtew8t8btH-J91CTe7Wv(_YrfIasrE%im2G~- zPA|WUet&y(%%U@OtZW~w+``TV&JV7~=`D<@YT^sDNT7a^u3hw5y~y(ZSgk8Rkh_2a zVVC=}8F1NAgL+yf_pywv#iYsEoOr{l(t*HMC|^pmtGSE6#J9)tLd#(oFCFdHpMH|z z2n0Ql8m|w5P5>GJl7m6qUZC+yE431`JMu+C>MogGs)y~Zx}Dw!Pnvt)p=gVkA2_`q zT1&|_8ynURnq2>7k*kuHyLwbaU-6;QtmkuogO`Wc-X|Gu&($9dXD73veKLOIj*HE0 z;fyrfNuqXuW+L2w5X9t340HI&F#$UJo2bgFN{+dVrR{d}=6ECTDB3{LVo$DUJfU}o zuU;eCBn&||@I>(?pk~zpBZ?=@_ik#oPkoak65BY$I88t<&NHV8rn)oQ<^~JR)x`d6p=S5^?0yWIDrIzqi*g%^__9Sx;!E>?TG1-|m9IQ{Y$Bl} zk^dE0+yo(qz&sC^ZkuZ?$3G44efO(q;>S@YAhXL@otz#%)6hL?SSLQH zwR1pUR?q9zz|k)AyImd19JH`rS_dp!#FnA%O>&6VJV*CtDKK(IcY7hneRlb+PYCj2tnOxVr9S3)e+mmB_hAW!cEDLzWMy*sK~H>g6!A1PXS~nCCK`QY$keZV)H)%Wo~nPatJPK`xVixCuE%!r^dCe#*kwyD^5a(% zv7PlIK(|8T*`F+I-MUK<$v77pj&>XCH+komla675zWsGFY3GXKh`&Mi=dKUyxdsbV zCnmMj$zKqO2AN8OKFssafS0V;jFO z0((WL)CnQ1Zf?gwoI29y^mWv2c$_)rX}A-DHi-1NGgPhCd4vWgS-nE_CT;&=x8sUv z^+HZ`{OGU7D@&=4w$yGv^Y-P%=I*Pa$_hQrL62P#V$A{;HMf!sfYJ|LSES9dS7cPy20p^J-(N8>2TM#mPtIY@=|>Uy+?k&d(%Q zV$%&m^ksNs8w*A5j9gY{RK|Tuf zdeetnO)m^Tuv4VMU|)VeA0IoP_PMy2UCxReYOp^C4486 z63PU6dEN4j=@~h|N{m5e*i}qmvdlcv(8SlHF!KZ)$>`=gD&;bVa)IVk9Tqc(qL3?ic#|ml_b30#a3Uj8qk+Sfiv}5H^q&D@z21XvyP&1_q&o%vBceI z*~fT&(kEeq3!DXdlP9}mHh(Vq(cMgvx>tS!$brl})j4i|h*HOp1;cdq$^~asY~GWsp44rs#F0ueA{t_EY5@nSV53=%~b~7OsPD8 z!H|2HT-5PA3ekMSHRK@jzSZu@VEhBuN5IGDuGnr!%*+16pk7I>;p>pJ^T^2(e{C+g zbjlQXmoqnkRqlaU+Wtp8>B9EPV=^Id(kY`_Tg{UL7LFzKVMvEI8~J27x#}=Cyyf{Ox#W_sf&&Q=aH3(@XJ`4cJHX<8wb-119+54gl8K3;jw|I$(lJpYy@TT+SLe))*Z+GcAb%>xz6Hb$4$V% z@Di@iF|~MGe?w%16R-exI7fKr|HT44cs8Qe>w*3Gn6|i1z5d|59MXF?xhNzt^PYR> z{7({0nZ+|@+Ep|>7Ghhwuu1|=opNee_tiC-v~%cUb35AfxhRy5s0sw`2`?exU60Zb zbHSgmkDM0VPuQ5Y8G_J>JZbPX5*i3ZCcf90o?Rxf&2P#(Y?9@@)5NqR9Q`XY396e0 zV^$+v>2{Lwn$QB;Vr9Fv!}I}#?$2}FmhF7}3a@ge4{N$ZyT|7qSs0wi*fu^|fR_|I zwegLbT+vQCKz^AZfavIZ-RPdt?nNT)+x286st+oW_6uQ?5r>#m6-!f()v`y=dk~Ai zB9+W5k9uPv3J`vLj$d1n^6O1*$18aj!bw$cGxpuirmpHe=PEXedp2NlODCUy=D`TW zen_K1jG$;c@rk*?a~LDaF$g&8d5~5e{l%j+g>wCDt0x$#A3%hale(H!vIrQM`nP81 zU_?|fX=MK=D0G`+DD?#zGl*or+Pi)rl9V4tzYNkt3ZSJs=zWSF?hERD+;q_@p+pLu7kbWonFFVCbxTQQn(W6tC>uaEfBu}PPGQipFk^7t#)CV1Js@4%7ha%yVav$5cHZ{@B`K*JjDv+<5&DmF)<0a`A)_K}W zbDf$_Lc{bA0FBolV2yVwby^%Lw$guVvGxe0@ImPyDP0H>jN=KL@)`!V(ygUE>|n#7 zYcqrS*!E>YF84Us-L|mr{ff;(m|tk07|%1#gVa!^(mE}NHVb)a3=4PP2=w|_wJqtd z-Y^MA-_54upI)}N)d3pkH^RH5&8KfU?7BX2`E`4E0EbeX**re(@>8n#I{7F4x!3yB zIpx<11@wK3@s7f_-WO+->Wl9LYi!HE9=)w-CF*@Ii-W-2DK&RxvdBh|vGV;l!~3k0 z*Dk&9B4T`LJx66USpP6+0kx8#(fiiUk5u)ZzQxlS1*(r@|K((yms82&566uOqHjkU*``QTppBnQ;;m8|)Vt@ct>vmP;+coUU^+s@g}A=j2@z$>%H59b zs;}t)$qz0tX#NQdg6*cq-g+J=NcG?(g=!!!nwPt212Jc@Jle}|cN+o= zMeOZfI4#BnuzMhbyh;o&Px`GqYm4NAern0NKnYKsf9S**8mweH%tc!b{jfY?2@-ag zq#o%zP!xPIF=OW^Rx>xpZJz8hFSL5YhvI1e}&ad%5wy?8voelcZ) zpx%c-^mQJc<(yG7v7>78WLkrQMCV0A4P^N)sDm?Z#}tbME*+`KkaMPaIy|p&67>rr z8{lkx%`_3y{j2|4_o(xwrUkiD*oM?LM?}PUHg){EsMWOOH~%ruMU;#XD>xBx_ku96 zzWuw{L8O(98xq_ia1NB?*4Aa!Gf$B1(#8aVm6rU^N}IOGQ8wY&V==e;q7G!`iK5RK z1Zh|>(P;7CL7B#>U}DdoVWe=aNF1ju$BOx-qITU@FUsOUj`C~%6IG1S8Bxmi7niJ^6e<@yn+50Aw`!ti8(h1fC4J4T3ZAfWM|wEt2-#MPkoTD}57oDuNZLLS?AqF%TrC5=B8RpnkTzV;j`6SyCDrK_ zTkkNRlEY;Ue={~x&!x`L>`(}9WuMY*r`Kyx%6v6HoJ(!XMNyc3dYQ}2bjhAk>~4)# zAiw0pQav=9W|8bo%$?myy40Tu@3$e>Da`uQI?=Ma$R(~CFpX5eVSIg%1sTiDO6JP^ z(3pXORltBY;_zBL+HI+~WOo|<%%gxn9@N2pBtf{<-JKwF^0Y@+>f@G^U2?9*0ZuY# zjsjj!Q9_DqnMU0N>swo@k}X2Z9b6%3lQJG4nwH7hO^_W}fO?;*L3D&pJw{eZ*Eq4> zaQ3J3ep{l!yNVl(#bF9td&MOc-$q)-+`6JmltJsvIRE)P?--# z{dSzFO#&ZtCpkJtLDF2h2559yDZnkRQBE}Hs{+pN$hhRZXo#a#_Wxr^KMx}JUr+5n zT!0r#Bay!%mod3nyNI6FZGVNdP*xeSEZ-pQ+G4~ChZjPs;dLC34uyov>K97$_A~8) zfN;4lk`S0cxY9+?cJ&!`?jHQ_26TvN! ze;BZ%9G`jE$QrFW*raj=a34vS*(Nb~0oB zUj~{s>$`yf`fDmRurnwQ%Mf?RUjxWZ5;SE0;o;6oa-%Bh?hT|&K@!|115#&(me_hHExWW{gQvJh~ zar~p$xP1CBU8>3#og!FHe21|vSJxl`Ur@~-U0gsZODK=JsnVIgz!QafK!oL zQl$-zP=@9mVo8DcFe#wmETG!6ivbhGvy1e~@@OMJYn&`xCJc1zG^Yqu$2%jI6eEgRf5t$>(y{*!P|9i4tv;_Xbs*{X4; z^djcjDGc^LU~-E;;SM5AwU?_n)O%&@8{6y;y5B9qG$u92Hz7`17obBz4l#0oZ^2&` zhNt6IK5Kb+$@cO)ef)IBvySB!Jh|5+`#02RS>Dh;i$?OC;SyZHOC@Y`&h>we0?L0K13N5q z7W-MeKgj=k@-NzXuyR1n67yHh+emh5v;W3XigmMnrI1Zo0INIjt-pe!;8L^6|CkbY z+*ADOMnZ}_s1euh$d z>s>G9iPWD9`}l#@EQTqga@h#JBy`d?SY|BjC!qTjQ+SAPR3OUm`)YFu>e zt@if{x#xdbD?`I`nau1h+h~(;(}~s@ zU6mB!PE~#6HAq>v)}9c?|Hxo)VGeQot7@f*aKw)TGRfI9v<4+=s(R<08TWH(04ew> z#BdJTMDY+oCeJBJG5hjQh)oH{1{2AvRZWdm4p`zz@Zl?_)#~e{^3VC@mJ4ZtKSp?k!Ti{ zENxR@crAbg&?&C?tjrkY-BL03^_ANj)%j@V~j|aGA5VOmdh>)`S0Wnz$N0^hf=pMxakc=y312nq}Yf7wxX{=p`_yPj~5 zGWo@aGSY#|2j~0fwBG*hc3jf&K6?TOHM|)#&AlVrk&lBFXi9{s@=Cvue@rN$gg%BC z+8iyhnQO5OSZ5h^+t?U-ADJSvjAT;Cd!MEXx_6O^+y_@xT;aFttF%sF+DEJK%J6FB z)rAqFFe3EW>qrj4=e>wFVjw>mkEU-=92vpOUbZYa>$B6s$T}fDtb84JENAt3Zyv_Z zA|P3eO(d6U`f}qC97uiI#npeqY^A*VFBWfGW9;SNEJ>X`v_E~C5Nh$3*Bq+fzNK8K zYy)qq>(DSV8M@Gbje-gSTm5`Iak)DXmM2*H$_u2NPexdf$O({do`ZZ72UA_oj&-#2 z3yn;U;sxf(8_1y7@mTPpd#!g;*m$DvcoQmqMZ0~QuscY|A~kcq_4O@TTYb{Dm9V&E zf}J{AneVC`iYK*Kfg*fpS%4YZ_4u_&4Tvgc*|XVT&`XHT(;wUG5w%a8*c41DyDy31 zoqiIUIy(7Y-5Zf`Swgb^{S;{R;He~C=e5caI2GJm{v;nWcdLkqMOS-MKV*B5kEC|9 z!Pj1;c3w~Mlk3Gf!SJ-1(#j2_=d(9dzCo3;Bxa`29dd77;x7J*iww#rOy3`F%Lv}^11U!B%3ig!y~3ya=se}yGttGfvP-cqpD z*KG@f7K^gpjMa;ObJ_B0!)hcT!KuXa16!ceyVAv9o3CpxcOOW4sx_aJFgK)K?i)eR zWeV}0!dEVkXCZnlqnnS+ryP2`A~BUj&_I(F>s@*s06Bq_wRyFJq^v1`$9&4Ah55jK z{rMCqEW0l^IKFu@*b<6^*D*@=|3iYJYQ_7p)A{y+_1M3R>VhRIKUWkm2r{2Fgd zky|}xS0`s^U)y!leXR1Ft?Uwlc7%qG$z2r%kV6Yn7?`w2H zGtQWDSu6MpkDL7VoFK^MG#|2i1+#TSEVj0gjPZ{Sye9q>i}R3qDlK`{p3>YYabMZh z#%D2%sk;%mk1#x0EmW_te8GFWXw7vgr4(#{ayX2orqf(x*I&sC%rmJf+8Nr+Pvo(J zz=?UPF9U(to*`GT7Lq2R_4Jz{iyo~ZL%pZ@yj3d8bdhCi^*c8_kqn7sH_f!QKEXA8 zbnh%p$Mqr?KA*6(+fXw7d7kraz#JxynR`_3(60AsddbocRKK1R)UH#1lb|4^<4Q~U z`22%7erOdn*SA3P_sp?sziaZEHJn#}fJH_BKA0XUt6O%f;Ylqvl;ewUp}&g^`chcC z|Jll9rXdYa?PrDaqIY7sT7t^U)7vg>^yXjm%uHudl+yUg3+8JKBG#Ao)ml#<518so`h=B68a7<{@W?qUm z1c)XPdF=lg;crP?4xJOHaQVhRQD-V-J-M$f7x&3M(62V(QTZ9ksY#`UCqv&l~nI{lR1N5;I;$oGWg` zpAaY^%D6u{f(I=vXO9Nn-Btx88^s&=PP&A@t{yPidF+jF6u!?%7ip18)-y1&VK&Ba zk+)rMn7nxhuE>ZlG+fOT&UpXVNN)Xelt6mr{NQ;rHcH^X$xq({-mN1!^>;O@j~;Oj z@QhC3ta$VmNuXUQFN&^$v)Bi&8-$1uD2wL@A_3vIV**{n3fq9?q|QX#k5%ffeVrH4 zD*d2JY*n3M2W0qh6EA0Vkqe9Zv%5p2m70nb^-6To%?AQn>|e`oz0>cVDycNpD@RZ( zt54c|tOe-tt{Cy=jQ5hwayrE`krcZVp`B}ufS>Yj3Kjh359Wgw*(ANE4FIU*JwKKG z`+V2eHR#x1pPwEHoZ9)2Ug@gGalgdz5tjzOCYCwzRocyD98C4>aK|!W{f-$Z*TD|p zo4r+>n7k!L^9Wm4e$&`C&)2z)a{aI=g=ynps=ztv_IhXDnZ~rk_5b39-Bcw+Vp7E{ zTE1fgdXexbY$opG2WC5ZTa)pEl%D#WZp-5|59leczkzU(02UCASEft+5lu?|8W?3> zo<$oY#=eMwzb=&rjzX#dz)hivTAnG&NdU{=c#6MyHskTcOhOaPrVx`|w*- z?`_cNnIU8g%dUHi)BY3|Q-uC+f*A#|R*ARR@K-%epN?@b4@7a7qu{^a-ci8AhA*U z@#p^AU;_{;x}Wf@>~+cGr3jh|GNqpbq7q=LS$DK^6ZgD-2f@#bubb_fY-v8IGc^ci zlk7Zr$_R!CB%J?zfiody9)7I9aFoxsK8$}be3b({_(v<^PghZn;OMfPa{YsNFuE0) zlKRg?HLDXKwO$sTLdXe-4SKLjGyRK#rqb7~S7r>@P*H<%k%>pM^yXeW0li}PepY?Q zW}F>$=t&Rr&%9}ip9Wr3s6h_?RcjW&$)(a?7iV~oZ$a1$UXEy)sc<{UbTmRP7H%)o2pPL)-t;k#284A`%kk7jKO4sp^*hh0h z?uo09U$a9axPx#t4!=$ejTc1O_-Ad-nvdbPv?-l9V!CtRJ~Gm>Co zjtAH^w94x_^uh<|M?A9D#R|*mQZaKz--ow0t&iIIWuM0p5@J<~mGBpjw21CWF40tl z`2ONc5|slwd^bnQ`*io{!IT=ISw4LqV7~BO5yo#+-zTz=|G_n)}mb3CLS+5wYUO!yJP58v$FYbiK$%;Szv&ln(*5HM)5# z_rJJETZ2mcO%{100WLaKet>b+)Zlfpm#TSiw%3sDM7}y21e6b(w=F#)w)RI+$dDP%V6X5RRMOT=A=qFD6B;qVU!UFihsl+j^4p{H zZr%^RswsTHvGcclQP_mLjFaqKnm`$iM~Uv5hh3p6-rHFqz-|GW6Yui?Fwu_!>AXJs z6xrM1WwN)w3&@i_u?p(n=47|Vm(qB`jDqMRrOcix{eSG8Wmwc(+xAr&gh3Gz1cq$@ z0)li%ih@c>2}l|+bjQ#F0wSe!cO%_h5>i9g(A_=sz`JJe?cUzU^Ss~R<2~*#egK?U zYyH<+*LD4_^E`4s{@RvtOQ;rp#{dMFjMnQt1hRQ{R0FRaGl0j_ZH!pDP5wOBecma0 zdDl5&-gS99FnMlNRt!seBl7)+%ykHSF76sMpI-rbZgO zeb4uS{3OEXN8-zar$bM}#;rr#j82oS;kqlF5~oIeDFhg!Exq2va~z^YiqSGeiA${| z{mK0h?&)^C&Sn+YS(gAA*6K^E1N?--r|X=9t~u_g)kh7pV{@a=Ad@yrny(7_#PXd_ zNmPsVMdHK=_?L9pq1e@D!@cQ){(cr>O^z$anf|Wkz>M%`F=Wr zFR}xVLNL=*c$jZzzGt+|O#5hJD{C36RODp&{W9>mh_Uc0e713+mh*ZFH*;N8>1LS= z>zVJ9T+Ml4bz|ynh^OKyCxX7AhlmqberQR;t45r&gL{(A7`2IE6PD=hj^@u}&|DNHEpYH&;m5AG)DDL&d_hy&xkMv7iAvS9+ zoD*JJCU~SENxJ(MpkZU5DenpP&$wgZk%0Vc1y6+ZRI&xCg;n#;g1Ej|Gbxz3ze%*! z9A)?U4d6hG%ZP&^X@A_^6aCheC1kBA@ur{K)QP_H6|Rvr5N;FF$nXnTc3K&TeB{En z;Wjn}90e1SS35mS)n`856cL`V+NwEWPbFwUBV5mu3vyHo-p%$7Yr7JMYg>4#2uODt zxDwn6X;qGY`$qQkb~XvRdO69rGf5n+Nb7uqR{3I{549J{DU1F(8I+}+Z%Qq!BVb5& zB-T>`b^H?&Tb+~XN$fTl&-g|Xuxl=+B2>N@sxtjmoWu>iDsFi#*#Akd$3 zEE{2&%WJnqA~cxd%dz@iBEs=mNky&QxcvM#=8H#I0oC(ZQ$>m4UmpEn|JZa`exg2Q z+wqN4whBm2ebY_sHJQ52&JK$S?cO#H*aH?{yc(7U4L>;Ugf!v3?5K#^|C9=;^*-GV z?5F+A8gLF4b_Xa-lgnk@KBVG}qZTNu-SOIF7CBv4oM}qdYd!UE=2@IitHBz)BnW3;?Cy)mkGcVVo57aZV^lqwv`_%gQ z@qLZK;(9vtdJ&@Dhao0IPkgIiTgQCprofq{_wC)}Q=slP(+N6?T}~3+@Fw0%o}8av z;7Mjjrj2Cuc-CwuS>X8#F1w&nDne50+7~C`ajDLYI}De)>j^n;u41(;#bW3pJ!nWK*Av*!U5Tk zI2FdZB@6}kVfO08pA_7q@4=5w=(KVM8iE1KPFHoyelv#OP-4$OV505Y-M5p$a`d}L z>8tZSZC`fnx=Mx{fx%Kj{BciY+jwu23FL=e?#wY6sssgqyN`tAO!pYiEth2Zj;0M? zRVe7v*aF6FC30HnCT?X)(^1TU_exvEDyMX-fh(U!j(yQ~;M??!NRuOW$e%?Ahncty zI3hDXFUDe3B?v_n4vFXvF{v)wXa!&y8UvO^JUt9F%eZU{`j_{azF?8?iF(h;MKo~k z^fWZ}dSDdKI5`Ux_$KQqkzi}_ z2Hy7fDe*j?#m#`+JRF_tSy%IRzHgp(y15^V4tYy!;{mA(zjF<6Zhizaj<7ILW^o zd(#M1RAxuUtTluq?8}e%w`hdVKs=rmoPGYo8c==5^$o$REfsoLawM@I;AzGB9S%SZ zdFGZb8UNyPvdyDfZ(jY8uSv_yWiHg9VUGLS#+~@`m)B9)zZkwJqxW)&1`TTf=cqNM zjnT4tZNqTq3&U&7m1Okk*Q(*G+?P6u>gXq&5ken}9<|N*87c*F8Eus!O+6;s8PO?G zHRRS!-o#R!5hPSwoo=n!dAZ8kIi^zgT4l}*T2w$LZ~ry*{`Om4>}tTjp&q(4H{|iB ziF?>}MM*YhDQ9<0-*G`+2K~QEbYSI89&k+n>E`SW5~h_roe8tr`vpQ*7Wb+f@mEB7 zLGmdj=XtqPPo(ZlD>-6#C}$A*s;As3J7^GCw%*UE-F7F^IXl`pIY&b#y+M<{ANt1I zny4Kw38nQBb7o2ncOkP)4d9QO8^H0mvKav1K?8uoJMY`TJEhj@Dgx@f|E&c}gmh+D z#d?tmKkA{((JQswl}FlB2TNIqeChttQV~x76<$1!w@??SwIya)DT;aJ#Ipl0$Yj1k zO5`Ao!#v5&eoM}ACZPH3?3g}joFtIc+0j5|}G znrPY>V$Ii22@?#S3-LCnfGbqws?0V4>ojfDP67AyOJeKtiF%9_ToA-!I8dE-W+n@- zV#BT7SBlMI!L!3ID}IeUd^1-8xjZesr0sHyv#G!quvkvD*A%`mw(4<&_T&uh7 z4Gnjib_Cn;Pa;5aY>lp(X(AtjY~V&%zFlf?-?0|hV?7pt7DMAz#G)ie%<*WFZnn-r z{et*6KNhU=Et-%T=8JOh##V#zK)nqRf4dD7a!LIMQ6_>WD_q2g2KHqv4_q;j6Sscd zpjHDCipP>Di%_GNOHA=8-nFNjkM*w?_Jy_Z?sv;9v~gE~wkCd@b3H74st1bqUfhtYh(UP4vju`?B*tO0CEAdXpGI<2F{&Okus-J&lqwi zRgcD>2q7u`iA@G;M;5=ID6MpEOJfe@=#x--^t*RvCyG06ACRfgCZU&)?N~=9^?oV8 z#U5KQ6E+nE(gM-w2LlR@eMx6=^5tl}-Y?8Awwnkm0}KP_X&z4wxy0}C!B?M9iqJ#d z=dN~c`n1M>^dcq-5<*Ta_`lL4<({4ZuaS6 z6y4aL#0xu<1?M-?x9U(qg>@VP=4$O;sR3~h1Bg;-HtSu+w&Pe+qZ72*q~i z)4M{v#E7??c0PH)3dzliP$k!5ZK+8lY4}<017fXA+U2r>Dw$g%ms!iFo%~2ZH{YkS zsRy?l=W6(CDO3FRd~dR~=vV*`^jmR?kKCv(bzeGBoKcv*As_5+k#=*_TQyzQgim#Y z$o}JrxOdBAOqm8d#z!gumll(B?jVFc>gTNPyJS}|c4H#uGRNQAWdUD6x7ckWsW2Aq z;5Wybcp$2|d#C)%=RTx0d@=4v1LbT8VoXI_Sv^%r8EJNgzZ9pEkiSg{_ARq<6$(M< z2@;q;Hr_Yj(+5yBG@ zXiCoKqppP&)!7)7VUUS3`dU`&s1+wP*Q7s?bbC)IaN0IL$)Sn|6~^8xtXTrf=KMdF zjb04M^=k{tyx}~yx{Q$K>a*ey^xiNCuvip9bAh1p=LHsMtF;vPk#iiK6gU$BUV?lHc zF>-9P!84;V_kF&Z2d*r1gfSdQr+zB;a|Q}#Cq~yiQ>(^E=*m&ag%y9T2#`#|&#m5l z8H-);RN5neC*^`WA9_6;(r`9cY1a@ea9@247eaXjky-Q2ee(ggIKjdtnr^=q=U9up zW$SLwZtLR@4gg>IVHW`2w1Aw8<8DoHr;lH+QkaoIXGa+tTz^>lt$8*Oesbpw@iBPm zah6%N&M&({T$dBC?K7^ds*7mz7iDT2^ss3Ne>Db;JjEApMwp0h1Xy(o( z1*aznX!6gaO;IL>hc((5k`oBT5cqMr_da}6jE^g{{qbNfP|luOkPPRsgFaD~lG7u| zH|}FnJG4{#-RBx8x-y|x_7=it=d zwa}(R6QZb*`75ariCrG?F9ZmbDzc8YAWu1=nk#LapT2d-PH_=-FMcse42mW0U zwY@a46QAhGuy`L!b1Vu^*B?`bUVOGviL4<8!W%@_NDRVSAA z*2$YKi2vJ-TvKlfq2L>^kRX78`Of(E%{G`P51d!>dEA2w@bNUt8zOzu2gCENyKOHu zdANP5;e3K^OU2PTm=H9J= z>*;k0wAqOKVQE&0{nD*FR%2M+TOqiIB`xUa-fuI1yhqN{Xx7*aP zYVO_{$WnfssijA|7^Y1R0=9TLdnKrfP@4%vNsHQnpkua;D11wax7((_#s7qEGT-^! zWY=mYKCpV8XzE%KA1a7AuyNM>D6Z>#D0SkbjWT^A>ZlTIWP2!NEiZ0I0VV8h6lglc zFou!Oj;=zsOvj2JI0VN)pg|m#)ic$0ude;alkVkO>$KhQ+X2DQvr!L7ixP>-HF<|&_K39E5=iJlnpQjoa+x^k2v>DR9xX|k<+s^wX+r*L1!-A=au|(af`SQ<~8eU6` zWAVPiL`_p^=*X4MCTvcVpVyzEofbONT<&Yu9c{afeewmukYCBu!ddIqIpF2nO#uem zTnnAeTiJMB4;hy0U-d!Z?MJ#_wE8hq+RuV)4qSSkU_q&Q3u+!sui@hJ_kJx$mgl0e zpCIR$usi$viO)R1BZbL2p*~%Wik%M%B)TZy4U%(`T%R3~Vy)(wv;2EvEf=ot%oMAW zsZa=XS=YKmAGYLk7`LcJbaM&LO@1%0A-2%_0;=$>Gfr6 zJEO>_?>)7s@D%mo6@*%KvgenZhb@6fg=B6Zob>im-=)Sd{s`S)5^Nq{I<}3&ywknr zg2=5kE3Msf%xVpUWq13fKUotGfO))yi7(Qc)bkI6r*;0=6RSMPcR?Fr0$u#I9)!ReOGFxp zka)OElhut4#ItzM29y4C#@MyD?(zc&m2Trzl|Ga&1j={PuR#cR_9k%%CH+aNwe<^$ zYvY1PS!kTqI*@iG*?wYv(vl@i1ACo7bX)xhECxlg#v{tCgLpH5#mSDQn`x^S>B%Ut zdTiD~$4X7Io1}$*Xo|$|W<2>(H_hi47p($10FQtCew=rCFd-Qu3Dwm?S+yyEz|;Ns zBU#mJfJ6$<5FBRq-Wpvu(;Nel1F5)d6?cIh*x{Dep!g48{L%s_vp2iS+@u%dxgre| z-E`PyJS`j!)^UI>u4JCAuv3CyQgxc+R)Xc*f#uLHy zlD@?SzeT0TySNu2_`Ti9RNe^b%ks?swODMDt^=Jn1Z98vWm5ugQ%tLOw~L+Og=z_U zgUs;VGbWg{jZ{WJL;VidXq9C%p|iPo7Mg~=R1V$Q$LExQ z2{BkbT6Dda8?Ub9sF%3o5kWkdNrYdaKkJ?BJ_j;Ynbp#k6B)gf+arFh);s&pdOX8W zhW(%?FnifKcIVRyxVC#eTh*z-^!>p+82aaGn=@k`#%A`NdZimdxX56Caeq^K_3tAW z6;h3<;*#}USx;E5nt@y2ioS?sHNrZAuEi(GB_c2|W^^DgWsL$VY<^NOWT@BZ;pup% z4%6I=^>?CvLN6sj^uxGe8*4d^Uj2vfsrXOi;VJ?3ZES=@>k5sDy*5xqQ zDx+L>DAtPWRj$9T1j&RI%VJ=;EXRw1td?YFA~0~TDc?@=Sf}%rH zr03Q|gcov>-Nd5YdW%NSzVtuih~{QIb)fXwkGT+9??)mqR@4m;)^d0JsrS8+wh&Wv z(rkQ=hEXGtYx(Y|D2=QzXpQB}4pDE7{I=K?O9XNSGT0V~xaS|}XqmBJdd@@&9BL2N z7Gnsu4r`9YxxP!Lz_xw)U`b?+r8nR1Pjxox#X=lE($EGnUfDdQ%%G^j$23|_7B7+n zFqN9gL&`%ZS(tPhcCv)Odk1Tc($wsC#4q-3%Rrkt{zRT1zOe0vi6m|fYXqP`F~>+t zhtGvL8qjNFUMbX*t7sn!=snA)(FnXt3dJF?2zK|lXBPLP0EBT?=xtM7gV^|3RQq$n zx?iS8exu>EAMEMRI$q!St|tH_yJa8COd;Uw>=BRNKADYF6QbJbwXI=<8OG(QEksCG(*0U^-o9Qe}I=e zeEq_q!X`h$5br{mGa%XZsP$o>FYFdE&)bgB%_g3Yz!2KsaOm@!8pXuwlSkV+vsib9 ziFW_kJhA8yAM5>dalLx=rh#pzFD;&@KXK|cn>;13W$HMji)B6hM^g`5Wp3sJu_fqY z;Sl6-2%(C0!izOytPD>jx_Bf963Jv!1ZCy>*F#$?{yfIQ#zJJO7TPG^0sDYbbu!oX zCNFS14V+r6VP|nwcMfac_OqW|eH&tD-O%J$0n$tCE+E;VC(nw>wJ{--T@?f>$94ld zycxIey)nrhZRz)t@Ow`c*UL91ma~~Q_+$~AcsvNVEbWh0z-qcGu>#rnsJ`R2bp9EF zJ!G{#nKdXN=+ZclH(4>VG}EdwT}2#D@V?sexap+_@?$zHg{jYID|jxwad=8ObG7|X zRkt$^b)f2Y^D0o!pkQ9G?CixU$|xXZ!RmMnT4X8PLp5b_Oh6gKzF}nO=n4F<7<9#7_Q5XQ0&)O2 z2vGH4w1;b%F^iG6VlnncjyM57hMr=zhC-uYytnBqNpJCi#(Lkz;rf$w;P?R&xkpL9 z=|@50_F~=}_~8@Ntu)r7T!Qa`b^$^;KC-%Xx1YZVjq~_IX@x$UGE9-EjJ<)c7S$xE zG4H2d7nG}A?lirM(FSe$*cd3(CO$BTM+?_c5b>H3^I-6@pq7N-b`ZSV-_G!UyE$&( zpnnplPD_D!b%*7O1(1#FPrHx83AuWxp9jP+YKi|^Imwmfs||E{zzRr-Rtw}cfOgyN z#P|4YC`?I6V4?S(8nVtc-tbVDY>r1mh7>&S%xuSvc4Vh?T_A|<+|@w;ickTa1lEV+ zud9y4CLeR`4CM+>Yb0O5kDYr0tWfKBEs(>SCYE+jS4?k7#epl(Nj@gUiBnREHX4VI z_1w7G$|cxuwa&RB$aa+z_E?b`U{hzwmOHC=hC{lQg&)vposPRap2ili%ZQg6ZsA808$_+;cn`K(wo4sW%b_p2~J1Rf;ezczYHlZvN&3R91`90zKs;g)my@~ zcav!TQ#XzO0jp*OIPu`_OdE$jZ{IrNDvmSemBJMwF|x{F-_HB|7B7Bltb}N>GZw1| zhPXVQZnu-vETA>Qb0cI5blNAX%@FjL^D?@8$G6J0_FDMH+L?sZ(fmGozO4IpXnl11vw+MknwtHGs`;0^9!U4}p7(;BsIPgvz)?^%$=p&tN z3bIJESiXU9`UAfs9CWcqVFT79U483BUIKuxInXdgP#pl6***-Qj5=SVCp1>7LRcg>;E!0|C`1_93&ZKj*UdcyT$A4Pf+3?3l6XSv zbhgv_Xm>{bqvLX-R0&LL9A)(|P{(}G1G5-b=PY zJJh$w-K*)ZiKpOA#8Jihd(c;A)1R)z(7A8f$V78{U?9e<*>>-zPb32oj^1h^L>HiT z*lsx(uW>GKpX>CQP>yM8k%}P>P<_pRG;!9}(R`=BJk3LuCRJtoMhV^}Np{Qs% z%H8!bkd{2)aB>wXGp|zHv+ptMUT=XFwpx)U&Q`E-do9hn<>}`FqD^JvR26ctKAH`q zrtrUJf^T~cKSKVF=ppT&s<#6JD9yU5m$rv9bQFi|K*`Wc%ht`2j3S(qIA2v_IJ)vC zGt_C#jnf&yH+Pl0Bjc*mQG=wz&2qE2im#LDLK>;Ebvg1i0^e0D6kLV6Jx{O=+R_gw zQYIFK<1qyvDW0Q`EwlqKqRWiEK=QZn8pRf$#acso$J~`qoLbxB_x=}v?$oEs#UeD^ z|A5rX%);B^cvD=Z$%{C_1&zBs=j!qa_|vZ~>q(6lgV+3_*Oww{}VD*o| zfG-LrPs>=c?B+NdJAIfS@R4*2n zQK0mj zS}H7f)3oCFCLemg-&!@x#0V#E90RUZG!Z4KKnP^f2ad` ztI!X8K^D%08#l}V95>iplid;WAUK2nPdx5+eG{xXlp6cf)_w1A1+u>oGPfXakmGH# zf9WXDuP7J@Ai7cx+54iF`%GR-)Vh9O@BfC3iCWyn2=g}rt*`0NcLSb(A8+8mxC_Cz zE+#!d4*4c8L6GZBQqo%3W$OnDzhJWbryofskig_@+^OSXSmsL?E)K!DAD@Bz%9Rr! z!s!I$k?z;c+G~je2#Fud)9H7#ElD&1_(~-L9V7?3I>*Dh zf8|-@eY(SIw|08O&Fe4Y)w{!P)`vaJlYD7DzDxOF=h_Dz-S!WBqUf$DA%OahkctKL z>RQZylLP`>&g7b229~SlXjIR^YH7aFFl(xaL%NUJ3GK7qbK?c+qgN;MQid$Qoje$H z`NI>8SRaF295(T7u1`Rbj6Qz^NRWXN6~mvLCdf5jpGj>f(%qwHRLK=wA1S~B9p^WU zLf3|bl*7y3w;~I|)Po44FZTOCLSJ!IJU*GY=JSos)PCiENLMZhw_X#Sn*%00T%nw> zE_e3#Egw=wp2_`q7*D^0sjnsN4reLf4r8^KhaG~Bw5B71x#ae!P{^zWV&bq7dI9GM zVbd1=)(R0hy95j$`k=Je-L5&(((+$ttvj zj-^`FnP)+kHHVXq3AQ-`l>&@Y7{@QP=Uc>kBqyI4@bhH?J z=lBbA?#FL;CG@d*O~&vsEX|EB00=h%({}Or(1H3s;fogO;3A3 zhR*Buw;&yaWm*re_ud6~c3cG=n+9BSo@MyBzFD;IAA#Iob8rcY0qD-xw6{C}yWF2) zy)s4)u-8?;SHLDibnUgtGyq){PQ{@ZA;ycbDzc-@badcc`lk;je|%}bJ_D`Pt-)Wo zBgJ^Tpe^7(5`$Hl3z&0^e?ot_Kl)K=%2 z3+J9=>}ikF(&MRxn5E zeLJLSMzaTCYh=vOjHnD^E|7Nvxuwf9rZ2wtlYyu!)@~x!R_R?d-NT=6y`iQ&5Q%(l z{hpq)n|`M1+cVyaKy0%tSlttu*_wweHh8Zxtesc8(v#?+87JaeYd_b;%Vg|wxqq~~xzQKm_P*u!W}nRBjy zlKu-!0q0I`MnJvQzyY`MNIi8wTD556Ajjj|*zIT_1r%Yww}?ZA6Tk^dDmE=QVYlH! zfzN@gKv=AfNrQW!bReCpJ+gGeUHF3g)VQo!nHHjZZfo#n1Y1rgJGTm zGGNFBY>-UG6sU7ch3(`g9`(DZ;i>$oIYK<5$rzg74*Qjt0!A%RP+(U|3 zm8oNp>tRry*=dFH0>?wsz7vg2vWjs;5L3|GXRIh|?|`u_)4KNo{4MK)E^AN}` zJ&C%2UX896K0VoYmR75QQ#IjSBNgonZQKw5#)^3}{t@upnkapd-ipAAzldK#SZU6I&jx(zQze`u} z=|2AbPwF49!rYdk%_@Ha9#gm!Pr~okHTx?KL~PU_0t9Egt>>I188{-u6l9P%msT z`!rcpqO$PAPu{dvi&kaxUVRN5$)X1oTGok?JSGl2(Rw9aKN(^_$P&9Zg1n4u1)38f z`2GZP>+2DV>}j!;2I7a0m1;&inB}!+U8~lj3|7F^b5x0PM$gb)PBSoa_Z65~myJOc z6Kyt)qxfmb5c(HO1IH(^7GA{C-fP!U==Ypycn=(-qJVgR|9bwecjz`d3B)CV(&^`Y zTa<^G>0SqGonXO>m{Z@X?mij9)!L<~N2qEyEV899mD2%ahv!Lq4~FyHO=p5kEgQfz zcHi$tQRZr=9aan|wg&WXjcrXjZW7S1Bj5Ove~GgkDbRde#VS~#6LYGTJ8a|GZf~Xq zcL`Io>MB`}3OKdxT*YHBIUAlhry6gPW4}0by@2gi*WEmu80I{x;#K~6V|)M5$5H#s zm}`X=n@emmizNEI;(cpH_jv6lx1xw~LG#T;x6`e%Xg1Bt0YNSsLx0EBPPLsc|4yu^ z%r(Z2dMiLDtv^??nYEj8h6M;zRvm-(#llrZ92U998(dX6SELB{S=38A^$VYbbH2II zF{~TbQlnv;x?Ry4CUSaa)t{-D{t}Wm9T>g+Q-){h3b$!%UY*vvMTF}Vm)T?|Zhx_s zy#R8fJQ(?-7KkxAr!Cxk*kx9S*5m3^Ox{$QPa^PB;<{s!9&1u zW4rcA>ugY^p>6IG>?wp+vxAM5F%sfew)aIg~*K>1>;kBdm~$Z6Yz`jkKS1YKP8o!%Ij`;>JFac zFGfFOn_T^=;I^l$RMCeA$h79P=%|}r(a1s2UFL1D7hYE`^RCWNY1+PF-k-@1n=E4T z##s+JD@Jf(6i5wk7>bPCJUpMxEqa0%XH`>|(Lh)Uwf}LgbOY2+B0 zD@Mq$T|YQFS8gO%FDf9)(Zci*pKOLT?Mem*(5S&_yqh={>SbEj{A^GVytuh>gk?pp zrch%ZxQCd}o*uq?^yD;Ty0ucxq7U}_hph!0C*Q&)GhpYVKeedoN0RbVJaUw zyaP46dV7g`^-ylMeAy5rb=EMO;^Tv&-wc`2+m!K z&}{jItn6GK#m(rT%||%uA}-^qMWEgi4Z2*SOPM9BQXh|&3~Rh6!QVB_Dt{4sCfZeZ zj?#EUSgifdPUgwHM31I<16{O0qw`3fwn)+ZT3*+bN3Y$CB}!Rk^hHYX`N>)yZ}9o~ z;Wqt{j0+R6;VKCnqc;ihCU^LBk~z(8?Qb2{%P;ovRdO^siLS9l?9zTv=v@UA5bbS7 z?XQv9Rb*iy>JqN>y1Urszb zsXcDTZ0W zG9B!EWC19u+T*7R#tXxB(yc6(b%}~Qao(pz)|D=uailRElP!=eKU!TzFck<-gO59{Z?jWUw zAv)ORYIfc=Kc*OcE)MBu$0FQ~ZIC%@$JO?7LC|dTfnKUf3d&#_NR06z~%JY zp|6!Mwhug}2|qHE#svoDI(zPRrgbY*!UHU8tVK@rIn8w|y&ov1MP$3j#LXn>q`2Q9 zlunmYNp#lus3Mj=7jQAfc(A>rkwu(yD0!=DV;mwca$erKelR>Su{x3mAZ%X8%?j@c zkcfKEG_NkQv~DFGNZUGwz{L~W%52Dx3)R?U+Z157p5`Au^)<;jlq_{)E0Y>H{bc21 z>0D{GuuGL1XK^}F7odPvl?uK6?9 zhqy%VUu4RzA&2!X+Xr8(Kiv6tt_26}nw=vSbDbg{KOa2!_gKcKdsTTw_cr`>IHT*% zvDhW!a7{aQqak(9%<#q)CRreiZe_R~{}TQq4|!bSithMMmSU`0x{kLuJfVUeHx5(Y z$>I^>w@MQlTxNgY88I!gp!Hf-j$Fj^6VaH^MdMzY*%bMTxJH8B&TO;ZR2{l&bM9j1 z%5?}{aN_Y@_X!>%k*Q(rqtXTEh1o_ne%4-AKl)6{bFBU#& zFUb$el69LcA>LNqmpzsBsbav*D8-l}zM=o{cTV(L%Eer>{+=TL$S2JQuJ> zjkPY@6Zq-he=hvH8Q&v)&H91tX!p>^A>N!-K3DF-$s#;?fYY=tkcz+Y%w{3*-lh+E zU9Afpx8^keDw?I+W@Gs0!>T$-3~4&OHASBJWE|$8+#YKXD7>1-LA7ymL>7pw>*s=r zh6Ksfb8b36FugcN1zOe|oi0?ybRB{#R%AAe2R&U%T*q{1i->DQ*40f+k&V%x-4ln4 zOy2!Myn?BzgQ8=<2^d6Z|7*p5VaGahEw^T99doIxK*~#| zcJi;&>%8P{?4~K|$-5x2)Xgnup#qIaTHH0V>$2V&xC#Bhb+| z`hp#p_0FZ36sdjtcN)NE@oO5w*H|>_j8FckmOHAke1C|tbW`EU`~cVf&k5Ke7>KAZ zlAB7EqOkheOo3EBPd+@ye%vmZSsgMnd;TpAl_`@B(^(0PASs49HHDJC=YhLx$XYX+FS%4&^LS2DccTy6x!p4+ z>nX7yBKGkxH8NSUFVU2Q{t|F)bzVOKmX=Y#-ImTEI)nL)pcSyTOjg>DE~i_nFngE0 zToQ+HyU;xhKb2C}f`M6R^cuykJ#$W)WXS5fZz?o*a0;~Zr2FY*q${EPWfOWxI-2Ht z>N)1q{F`WLJYNURQ`R>_Q;%!p1bcIZ`SI^`lWhLX{32C0=bH9HjGLl9Lr0;R@qYA2 z*q?{a-Wn8Dd2raJ%OAa^%{r`BZaTUz4u{rE@kfLlrRZ#WmNv}iMa4IeqrVF4b*fse znUqy1SKs;lN$?>UXsa(eX~SVe_kN(zc9OsIajfd_#o_`#8-vZ#g@gzCrk~~bgO$B3 z)!sT+SF0z8xC13ZCZl%=3mvWydMqcwi2NneWchKKXr6*#(sOVz(i@(zl~Q7sO^^r{ zj&(WtqI9J{U5?j-wfd2TVA-_9OJ?0CHn?$Uy6W4WtE&;4+~Eh!JJKqF8Ed}W+v{=) zD;LYxN}R`p4X5pA8ZRO|JYHlsE)1!+;*r5}Z-vGf#h$&d8t)@~#N4x<>#Y==xnSvP zf1Da_OfpcmRAUmND4;QZ$aa?U1YTu4xdc`}R3y4SyiRW5QtV#9ZqsMi+VT$hRm!rd zt;#*tq6ym^6LLk(B3GoHHs^;n($fkkynDfAQ7}nk*c5u`AW_HJ>C+I*$HZ_T%pO6# zO@1ifmracjob#hgZPWI5;j(F7I2!yEQkljXYJ3F~>{*;5@-Pc(ifwqoxYpGouSm(J zBMmz4k`>(-v3@&6;thy2#tl!iPLDM0WU|5tVwjaa@&10gd@U07Cl+KT_Uw~Ca$CX# zp)q!w6*p~ue7>f4mQzN5xk}XA^3`Vl+h+XsZ3%I++R^V?b~aTfWm7J^usC}?cyP?* zY2Y3+snfI@9+m%o_2$W1=Y-vaiKO9hj{5LoB+5#7EhYC{{t9VXeimL~)bo zKAbr80MudsO`r}%l61MV`*DZ%*oZjvrnG6O>Af7+O^q>%)_xH?){bT>c&&L4ofQbY z$I@bNp6P(&@B2uXwFqe{;rt1nja%V1;~E&ye~-!NkG&Uu+Cd~1XSF1Wg*n@a|3^vI4Zk*nOR z&PkQEu76G(`UXVOZp$>DVMb1yTGwjL1rP#M#8=EoR^(0yZz}URmYA7MF`37h z@H0I}TWdUekWT*t)qx3r;v6^mIp58<6(8>XGht@p|8v4*`DU=K=C5hW0NrS2>L=(k z;8<|?=mNK?YboCK^$h3aKVdrNKv^LS(VbQcq%*79C=Cq=v$*>J{o)Eh9t<|?wuF(< znO8L1lfOH)Gp$_te$weLJW%*DtJjcO*ym!2*=jLDz-D!%zJh+1(z1yZh$000-KJa# z5M+%YcbU&1w-+s81<#V$L!2@t2i&_kp;oh!hYVPP4-zK0g%ZYlfNH%#4 zCQTrWNN}x4QEtOX3%=v|L+_<{^0#fK%-Ufe;`b~^WR8^+{izad;CPqVW#%iIlO>NY z-d4N}=IFcakD59@tivdo=?;@MtqC+|^yyO@my5z2aK4Ce-4b&hO$~n&26HE1KE-p> zs}n>I^ry)ud)bEm^_T@f7bTzWK2N(Qe- zD}}A`{@U`Sl$;ixlnzV5>og`|duU(B*wieQG#C7(gCTa|ljXZsj@x%9?lydCx$#eQ z1>!755x2stg3#OZe8Pt@Rwk^ok3G%`W?RV19cyil-U6B=RLIvw}~I}WXr*FD$G&VGa!b1GRkc#r(*wIaDPI4{Pm7h^rtP(x3kp%W>H}KS??J^ zvqw^4PDT#YWp^mdSvRaqzHqn;hV$4h_xyn$l@~Ex@Z#$Q>S!*Y$h#lE;IX^po~A|c z3vhU8&y^xvoz!_?*lzcP?oYqW)2iuY7i<3vT42LCxV?~@Ubjv0X`46orwog?deprl zK`h%-d$`AN>IRcx85M#%G-xKfd^FvjZqQq8f>bpvEey)&uaF!70x)utA$EQ8HQe9P zl~Tm?yG(4d2e3?izj#K(n%_E@yqb=CQ4y3^Z#91Eo9*q`MZdjJYLmAa)v+i%Q<{^t z6`!4~HBnK7xnQ`?9IN?oVXNSg9mvE9j=mO?8(ar@YM)iGt!Zq+e6VndzMP( z`>36^#cT(0D6SGJh<0s@1QdTy$Z(N1?yassHzaR?067om!p@uvxon@zq59X zx?Id^z%o}uGoc#OYKO~_4fEd}3XQ?VIg_MW;6GLXnOh-=;Hh)UwO}gB^>C%Y1+W>| z>dk)g@gXoRj;BQ&?#T&51rH|6whYb_nOIbwO{+Cn`PaYh{Um*gdn~LRu@0!j(TXXsm^@j;2z9O6!2Nfcdkp?Isj1HgXJ~hT|#c1um9EE(}z_w3fx5I1MUa z@V?`iN{*ed4@rLQX06h<;61!#vBh@M@L{*+uu^g4+O$KmW7Ot*z&91eFEDRYAD&+) z`NY5!^KJ&z8Mr6h_X%gNF9=^dJh>Xr*?r20@lcdQr0nCbC3@4mp4nz4m*m@C<*^jAa#czd?oS@(#YN1z$^JCe*Xu7~{r|%MHc9VuWJ->Xr--U zz-b@2l2&Uc!r{mXhb@2jmUax7@w^N)>16!j@3)*+E?0CZ;RjVOE>x9BvqtHoBcNdqIWfLa z_7Cwp^%QSZkAobY#{ll_TSCGYL+WO*@ePpN;K8$94H?-4f$E5Sy$|{Q+MY_(10wWexJIW*9v8EVI9+%|o(^dwe+yfA`Jj|dQ~irUjl_DX36keljv5lM zU00IRad*|OyTRDHJSBEkzHL5PA`ueAk0Ys_zmAOgE9>DX%5%RP{j608Ag-ObVwL3n z`ooN-FAWHOY}nxhqiFgvrg$geoY@jfmLk56nlR-8L9-jY)09iKH_9${$LHN?F!_Ea zvNGuS`F~!$j|oDYrz^6mI%Te;?J-eo%xG3B*^u)=uW-xKDT7~-j@t?MV}DjwhIV_1 z0u7^;O8V)@Kw1R)*1q_r7xm?TpBy(!GP>=UC0ZK;R_l@X?k`A+DDTHEF`4sW8^(w# znk`C3I>Ej};`pP&YO1gza1RbA@w{}!06R-y zd);!Xa#g>uOC0ieT)*%s;gwXTS6Rb0{Mdr13+QI`T(@d?0KMAahwMSk_C#nF^6TDj zq4_J*E7)-zIg$pDE8MT9Lv<7yz}+Zfvl_t@#*UthYytqNY5c+)8= z;P;>Yw%%_%0e|p^5yr<`1L=g7p4xfAHT|_-kML?>PK-9DaXr{@+{=+)pv8mT7>@D=h(PrJj_0<~r?R z5E_92F2G2zXxEMdl}_db9t)LHK0iezusFVyhVd+HMRxlMe;A>#_8JUt*aVL|~b&OkimkkW-PsN^x`!=;aCN0r_Np zXfW3AJB#SE%U2NOLU8YYUi_7goB8xHXlUWZxFq$d5ePDT%fIsPz057>8r`-Ahvu_q z*M8NMcS-T_;s&$8TMT1dZpf*gPgh*QGy7F@2LG4w0eGAHgPKY=f4vZ+^JNM+xW0>+ zyvI~pF$tCaJncF{|Bf4Q;KEV@EH;ucwRe-3Bn!SSYdK||ArBOps3pw~CLPU+x_FQy zC{W?d9W^PNSl1 zGwgJ_QFfk29#%H;j>rno?MUY#Ek~2RemRjyv)bbpL7>j9*WyP>4yrr+r~yT?>@XI! zph6QLV%6XF`-IP!YnnXl75T5JN}$ItuP>q}j(KdCYXI8(Ol$%C-vwA6GVD;CxDu0e z+1TsI6C%P@L+lc$AT5t^#Hevm*I8E|>lA#>e=!mkm$_{ZnytG&UdED+6|iCYK76}m z4rEszVXE0D(x=uSRbx=2zOxu@76rWED$2;UF!3uub-t+{0*j_7-~)bdvgr4V96J#y zoON+B(e&4OBuYsLG9tpxClN@~s;*LnOr9H$Z0~DMV|c4H;7Df6=^q8m(GE8l-}$o) zeSR0ZRd+FIy)`+(x(4y*n|$j{vwq4wqU2sU?4HYM!E3oIVCnArDn(4u|uJHK)V=^7c&UM%k1p zRvQ)S`-Y}`dq7Usn0Q<{GUjpI7zA?5PN zp9CGW8yT~p<9f&mAfNQg(`qZ68id6DAub1_u;33|qw#%eXGe2^5(fN@_g_w{Wr~DM zsXe*;YUH~Ta&!wI&OL=rO}HzB0Z;K`OG z(NeM07k=_Dev*&?VlJZgj)q^K>N0lYmCP%k`kN?P_=SF@>R`-B5jPI;cuTMdOw!m! zH{YBeVMWA7`!r<1&lEhz@>&bxB>X>eWUD0z1plmZ?sf6Xtw$S(1z!ewrE|7L02+-Z zAfvx0^E~6M-!NI~@Y_m+({Hho=mSFVP&8{LcmMNH5D=vh3TE@qwfJ+&L~*^q_EYqS z*Uo-nmIks(#%ppQJ-;`oqJ96xk$oK4SQAnZ$=Gq4r0RH){8oB>1&z#UvayCYSv4Y5 zYkDPaQ!Y3(_=aU%x^#Eo#Zxh*A^lC@?anY{^cEB-{!e>f9uM{Y{a>O*HxyDRm77qF zZp4*kv{({ptmWFSvM*(uWY=m56|Q}W>>B$zwu&qD{ITL=R_BM`?X8#G!JQz`!)vd1eHTb**X!NV zMOIxnJ(tO(LLqwY^-xdQ%2Wnn8R6yDYmpKG4gIPx8Xjh2_gTWAwM!fQG+Bd5wsb(5GSMH@!nIS;EKs0FsE@#wy*s9jBx3q=gN8fR8kK@!%(6 zN_K%TMTe7Qz6unlPNq!CBit2hCJ*t zaTGhynshZ_UjFIF4?K6Q!HV9}f!?09rRX7>zEur1nU3xL6oU(&B^}0#gX~?L-r60y zeE;>pePYb(kvCX!DW`y1`-H9P4W$b|=?n*0+OUfvD8+Bu9si6GAWy4fNG9X=HB4VY zyb#5kCIH1I>tq>liqF8&F*#6Ksh@IXU9872{j5!|7#12Z^e9Y9wQ>faJ5$XdUJ|-PfsF1;7E+NGADxqwd^HSlg^=^}$!E*~o9bEl#h7m=oA=rM zsi)>LP1PdB>do`B3;`J?t?}ZZb}FG{Dm`O5nP5{CLx-@~gQ*l4aC@=BWW(83Ghg5c zucZC!`@_B`)`Cu+vI_aE{fCW7S2>(tOT65wGs;~U{Qw1HIEe7$zyn5SG&R4v6rldt zTyqIHk-;}A!FczS;Tx!#sD5ZNd3RXU)4M7!>+KJ+k6ar4F%}$7toZ&IeRtmmd&6q) zd^?BF*%pq)61YqDV2Y6Q>cGaj#NNC6@h7v&jK$J_*GFA11**Cb@-Ud!`(P8;!kaN!Mb|`#M3Zy`;&0?7iXg*875Qo}T2+DeJY&h=3N|C0gir?|^!B z{L|`g##r|~fBo3+%BtYaq)`q~zO&Ml^^Rx^-fU_U9D*qfJ%1&0bM|H46pdowmp-dY zpf{y7g`TtrL*W5hEDd;ix*iUYMIYyNMw2+}IvIUD2D{x0Z`bb=g&C-cFWNDJ&X{&S z+Xt7646>N@@X?ZOfuEMTRzF%MMlqsD^lG@omX4njHp&-39-+m3PM5!{oRU!XCl?Zs zQhUR<@8J%Y7;tNAj6)*F&{VyW9@D6UV;H)SzlLC*;?KZnp zH?pe)H^hFzn!WA!gVx~6uM)m#%{Le5oBL!*x7B+lqWAsl58r+jGy~^ptPh*N=i56! zIU;naQ@6thI=pZ~KbCpZwf?sV-p4q?#cg!=cBVUKI5uC{5o$Y@CixvNlIx+HNCGD5 z&u`lE!$5p}B}W01g*ki=_Y>E2(?>$y!10$mqs#qd)7w8hPt73sz?2;^^FLkTzaqfO z{P#i7b@IQf@c(QTOyfNa|VUI6E(Yl_{hn{ZMBVgn}c!>jX+%fYa zxeq8(c~H?13H{`wN}Wi(8s*@<+_I+FzugUH!aWHGc-n+x&HcwgrQN!11tE(B7jrPzT!R8MlKF-j@1`$e1YLGh? zD`+@C@JTs6*~B>tR11PN4;ecv>ri{>-e|c@pP}}17H+XR>!N=2hYhR%&osFo%R}Qr zz=B{TiO~-4GOm(oRo>g0=qJ19^cy%&6V@wfOHn9Dw|!Q~gGr&)D{hEXS=l+S`YS){7PN<^x=1> zda^pFw;5UFBqC@pze4!_)4JUfuFs5H||%gxtfdMx@(+yG%GJRa5Ehn&aP=PN(*+YVAa6>sE>GEm1joI6=#mE|cd zihL1CSHmI?-Ro-!Wa%0p2v@Gn`%m^3keI!AH+cUEA2kj?u5wV`ue~|tVj{#QH5aX% z2*JE5R4Xnes%#i#UD=um=tKleGr1AA8&Mpex^O*Zs_SBA1`cvAy&4*UH-jsRs8elF zC{R+lmX)$odINuqkVLps*?A83|IRCFGWFotR7-^TL>G>7jG3XdlRi)pUiAN zwOhFaBdZ+35c-Yuh%|(G2$m$9KcOwa;FD>T%`*k+WB+*t#EtADWu@<2lXjqDEtnqC zp^U^AJp*nF0r!F4Iy3r+Qye!GAds$mr<^jqSBzQDTRRjc14KYa_O}R95R^s%@^D`O z(4&0Y{$9uF?iT9`>iAa_NdW%kuZXiIeuQ7gtnjfiP4J8?KDjNZE&!So0Ru5wzYIPQP#!}w}%#tD#Vs@DAA3NN=r|NxU6K5A7?__n?e~NmmeUk-v>>Frg$;7gn-78smdHE7hN15$ z_P_;;geZ?dx}{s-jk%5nzM8!VkjpcB+dIWWPYU@c zhuTx|kUo8HV`})`nurX1Q)iakw^?vVBGk3iRdDOnGMVe*ux6;d!FvbaS?1+-8> zR(!K_*PsyJyA2Od>(w4zsM9^Oi-Lpx&mk7J!~I8 zD2R*}=Br_vID6=1$LIt1)ujUOD$P}4C5-POuG6|9Q@40dX(B9=ByW~E=b*AB^f)xz zZ>vpC6*y^{s+>%I>|4Cavi(@!(*J2Qx*}Z||6TQ?hw(hUQkP z_0pa@8p>T)OY6n;1GSlwvLJrD%&yP(fqHoI2>0oXBc?Eoj=gq~Yca7J2+R>TIG5e* zFkCTWWNQAMQt8DkXdP>8+^!i+E4VZ8X~JO0K9_U@Lq|mW?!Mo%wBy$zTzRroTq}>D zQO3(DH`@)tEZp%n*)+09Z*9`cI^Y1|_EIWBFo8XNhJBKW71~aNk~pUr|EtjpCTFD= z8b#TuZzzc){6W$$EyC*J*qsAaKCpZ{O8)j@Z|>A|9qbXZ{nMTeX0|P8%aiCyrrb1k_t66axYcfJ&pXFy1tC>n$_e zBx=Mh1W=EGK%_)^dLUYI77x)zw{>-tA!t}$;x^XZ*);B)3kP2ki8FL3@cV<-qF0BPG z%)ONckVKCO*IbF~2hGgMCl8RhlC0~+T>1wp za>vw*4fM_k+Ya^=ARCsI_}Mtw?B%<>6f&{1)#pi>w_xkZtZsWi_|VZ-j$iuvwD`$X zv5S@ZSae}zY0yMH1JlcMy6vpFPq)2=Sqe`9&5pNF}_4 z*+1yX^dI`m=i}^&Q@+~*GSSD~WQ$~B?BuGylv>E@qa|Ml!}(Rj6s@L1bSeXj%QpuUvUM&v%aErTjxCdu9mbEj?@N804pwA zrroagklDkYAla2vDaThvQ*-#zBdE_1BOzN9=UmEI3T!Ks^wTtW%YLYp$9aNp?dg@^1Lpd)lS= z*~c}@+L5nfW`}{&F9WqcRhdf}epL?9r1M;1zY`bsP%gzQERwjpSRau~+hyb?jnw<` zG};IPw${m7kIh@lVmEB9n~6bCZ1kMjuc1C;@VRnwba2o1eW9g)X1poPt<{Y5o!j}x z>ILuhI`K2FqiYczis>y~9@*ObJI_s>)m!sYFCEM$)H2DfE$(5K7Ci8q>ymXMDq5Hv z-XQYE$V9w`DTA}F5-_n~^k;%SUS>J`O0d9j8x0@M8Yqvd@ZoU|T#=dg#`W}lWAfUcx8VUr$=;)GMO&O~Yl8MU1vg$5`*w4L zcP_I?d447gi|9b12l8I} zm}ZyLQGN9od{&qTBM_U{hqH2#xf8R8XfYb87Y9qO=kjuG3giWCy)9X#NjU~g*WT68 zwyjL2#9_`1pDoib^uCO&%)?3A4Of?qv9WXst;rIqp0}D6b8t^8xSV{RthrM;>DAt} zp+Rt~dK!V>%mh z?J_^If9HAChUA3$NilmVJwKT_y^bz_l7F!`F?E1HU}XT1Vw>iArt9zBL#E0z%-R=9 zZpoJ$3`eW+hY|{Htkfbb+cR1h>5Q)>2P9i&-z4{*^n|@l%BT^H*>Z2bMO*djSIYIT zcMwHgOUwSF|1*N#D)G+ z^o%706e6z3E)7=Y9V5y|F{<~h8SvL0=zIeLzalpArn~5>7Mvhdd%GAiFi2fQS->YK z1;!XwR;bqRjXk46eQDMdX`)C=I`1LdX~J{h{A}4mBe1hA)b7KO%-4-}nYFJ%_wjf5 zXv|D7C?kR#OepuZvUyfpr60+2ZT!TlaPpjQD@$=-$0RP;aZQbQbB_5{!eLuR}${ zTF4H4efGG%;bFlGt;t zZE;2_aYD68_6JCNSKY#_w_RFiG+Dpw;o+`#U6rIBfN|WKq!Mgw;mNfH^~tnAAJElM zq%KkY3MasAi&PkF+qgzun0c)U2Bb!HJ$3J^_1iW;O${<;mClIOx=-JCEABqnI2f{U ztiC}Y-O+ul@`DUeF^}V@w&fIUse?wPd}eE!vK~jeX{m`zmLayu3A2&&T-ZgPNn96P zpjv}xoqF~Vl7P6T?-oKgDJ)6c@c@;t8bxfMy7q>l*q@P?N1iJRPOFh=DDBD z`;prub4%98TO^%UrjS-9H=51JcDX?5d8NcF7|k_Q^LkFd55Aoil62x2g6 zOGT^OBmae8kXr{=&RACIrkO%3$=jZHeeIiSC80zH{1B>Sv<6iBq%e#5>C~_?t_DXY z_loqLAeYA>Wjj0~+clBSX!ryP2(*@>MwKj0GxxKQxy&FWcLWE~HH7u-vi};0)SQc$#lQ0Ilm%x`;}66=yq`%RfI73VPC`1#&d z>~u=P1OWH+?3b{0MhaJ{X72I&pGg`I#56zM5pX46r`~y81{=8 zSOaz(n#*mMMJh(v;lp5RWGMM5bik$xd`?WL^xYWo>)ZgwxdJsI?=)jb-Pi6P-&y2wyfl&cxn4?o)Gdt76Bf;#Xmxy?t=wY(^% zdVO3#QAI-eoU)vd?Uu_H;)~ryehiMxokCr2{NgZjvu#XulnALx`;kCi$yT~&rmNrl z(gw(>SzH0;opq+6kVEeL<>1+B#68GIPat*XIh{K{lcA+X9-#F-mq@ky!#pmY6en1V z@_4CNO#zltNSI*lGD9)ozv^I$OKI6hU5}9fiD&Z?S%tlp;&Ns60#W7#r@?VT=4wFH zc}?__0G&Uke~Q?G?GyFr`itlcF|da54wK{@z9=>k3Br{GzXCTP+Qsk5LviB6fx74NCI*WMm>`*XG_=D_u@vAFQH>u=}iisS2*H?`PsGOHWan>M8bP*6;lj30R_{zd7$=8HE17>zW@ZVldt zIC>gAgyeA6+9KYiE(}JXe7zvWL#Kt3R@)2NMXLd#=v?OB3MhB*Xl&lau~XM1RBKytUTw?zFoICAC3`FekDZ3as?=BFMT~a|v3{PJ zusE%Y?4-qYG4yB$~3cvMJStiW6ugwfNgP`ZQ73mH4yunm=jSDj=*W$Ek_VVQh^Pi=L7${d^FV&Wf5e&IIhxBIU~sP8m?@UFpc zx7S2C_tErW1|E8;B6mqzf>8v69Ux-)B`}zD=6pbWn>p_8A&_6 zvEiGEE}UOj!s0wu>J6;G1-vnwSo}4NkckuS%eT;2-PQyY6Z378E3sCO*1vonvAM2G zn83calGO{jCZ8xqtGG)zbu+hmT)k_#Rs`k{ooV2k*^%mWEeUTYWKS+%HjrxVg91d8 z4Q~86^hc2|Q7c}NFBgGQNju`Rl_{b#F8@w?zTmp-%Bsr!P>EjiL}vGNC&b3Rf5M8k zU-P-s_$qX;g;P)8)d!zQiPosH8A0xfcj_5+MteTR-g@nFx?Mvd4Y#?cD9L}4<2NI5qL1&$t z)*T;9WIOE2r-2GW2jdhHqv(-DG~xNo*1~@DAA~SwG-=0CcZqrZbdpPpk}1LJI zdB#mVJuqgw0jBU-tlk%}OX-pR3rBzA>3}A-vw3rKIb)8}ltuT-9SQJ?dm3H4)^;R^ zdy`A^6&k8TLB=xBYZ^EWH7By66Gu-z7x&{V-uICzs`6=qAue+AYHYt8I8%jK*ssFo1t%W$lH0rU%cPiQ43t^(|Mq9Xr<2R}&?@Ha_#a(`Kc`5)p_1Mcz}`rV z%7Jg?D3Jrid}cHfKq8OFyBIj{u1_QeO~1CY<#2o<@p$TAcSq0r9Tx&(>#x`VWqvQY zLWW2s)!@q>(7?wO9FiXL2lOU?=BuDv7$hOfh}#;Z{~ywtTNz)%t(=I;-u}NA>2DD2 z$PP%1N)G1y%W(el^>9hKTu4LSX7BuwnEcnP(Iaw$k~^f)-?P=<7USC`l_0wtZT*gW zlRNQ~FYe6?o(0jX7W04I;Me!+VO4`wZ~Vjv`5_vp7%xr-bp5=cizydmf$pNvh3{;{OX}nA Date: Tue, 8 Oct 2024 15:32:06 -0700 Subject: [PATCH 32/69] Update cli_reference.md --- docs/cli_reference.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 0b5e73fb9..f0f67192f 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -215,9 +215,8 @@ You can even run `llama model prompt-format` see all of the templates and their ``` llama model prompt-format -m Llama3.2-3B-Instruct ``` -

-image -

+![alt text](resources/prompt-format.png) + You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios. From f3923e3f0be79295af73fe16245a52b08515f148 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 08:41:36 -0700 Subject: [PATCH 33/69] Redo the { models, shields, memory_banks } typeset --- llama_stack/apis/memory/memory.py | 78 ------ llama_stack/apis/memory_banks/memory_banks.py | 66 ++++- llama_stack/apis/models/models.py | 27 +- llama_stack/apis/shields/shields.py | 36 ++- llama_stack/cli/stack/configure.py | 8 +- llama_stack/cli/tests/test_stack_build.py | 209 ++++++++------- llama_stack/distribution/configure.py | 103 +++++++- llama_stack/distribution/datatypes.py | 74 +++--- llama_stack/distribution/inspect.py | 10 +- llama_stack/distribution/resolver.py | 239 ++++++++++-------- llama_stack/distribution/routers/__init__.py | 21 +- .../distribution/routers/routing_tables.py | 155 +++++------- llama_stack/distribution/server/server.py | 8 +- llama_stack/providers/datatypes.py | 4 + .../impls/meta_reference/safety/config.py | 4 +- 15 files changed, 588 insertions(+), 454 deletions(-) diff --git a/llama_stack/apis/memory/memory.py b/llama_stack/apis/memory/memory.py index 261dd93ee..8ac4a08a6 100644 --- a/llama_stack/apis/memory/memory.py +++ b/llama_stack/apis/memory/memory.py @@ -13,7 +13,6 @@ from typing import List, Optional, Protocol from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field -from typing_extensions import Annotated from llama_models.llama3.api.datatypes import * # noqa: F403 @@ -26,44 +25,6 @@ class MemoryBankDocument(BaseModel): metadata: Dict[str, Any] = Field(default_factory=dict) -@json_schema_type -class MemoryBankType(Enum): - vector = "vector" - keyvalue = "keyvalue" - keyword = "keyword" - graph = "graph" - - -class VectorMemoryBankConfig(BaseModel): - type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value - embedding_model: str - chunk_size_in_tokens: int - overlap_size_in_tokens: Optional[int] = None - - -class KeyValueMemoryBankConfig(BaseModel): - type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value - - -class KeywordMemoryBankConfig(BaseModel): - type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value - - -class GraphMemoryBankConfig(BaseModel): - type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value - - -MemoryBankConfig = Annotated[ - Union[ - VectorMemoryBankConfig, - KeyValueMemoryBankConfig, - KeywordMemoryBankConfig, - GraphMemoryBankConfig, - ], - Field(discriminator="type"), -] - - class Chunk(BaseModel): content: InterleavedTextMedia token_count: int @@ -76,46 +37,7 @@ class QueryDocumentsResponse(BaseModel): scores: List[float] -@json_schema_type -class QueryAPI(Protocol): - @webmethod(route="/query_documents") - def query_documents( - self, - query: InterleavedTextMedia, - params: Optional[Dict[str, Any]] = None, - ) -> QueryDocumentsResponse: ... - - -@json_schema_type -class MemoryBank(BaseModel): - bank_id: str - name: str - config: MemoryBankConfig - # if there's a pre-existing (reachable-from-distribution) store which supports QueryAPI - url: Optional[URL] = None - - class Memory(Protocol): - @webmethod(route="/memory/create") - async def create_memory_bank( - self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: ... - - @webmethod(route="/memory/list", method="GET") - async def list_memory_banks(self) -> List[MemoryBank]: ... - - @webmethod(route="/memory/get", method="GET") - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: ... - - @webmethod(route="/memory/drop", method="DELETE") - async def drop_memory_bank( - self, - bank_id: str, - ) -> str: ... - # this will just block now until documents are inserted, but it should # probably return a Job instance which can be polled for completion @webmethod(route="/memory/insert") diff --git a/llama_stack/apis/memory_banks/memory_banks.py b/llama_stack/apis/memory_banks/memory_banks.py index 53ca83e84..d54c3868d 100644 --- a/llama_stack/apis/memory_banks/memory_banks.py +++ b/llama_stack/apis/memory_banks/memory_banks.py @@ -4,29 +4,67 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import List, Optional, Protocol +from enum import Enum +from typing import List, Literal, Optional, Protocol, Union from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field - -from llama_stack.apis.memory import MemoryBankType - -from llama_stack.distribution.datatypes import GenericProviderConfig +from typing_extensions import Annotated @json_schema_type -class MemoryBankSpec(BaseModel): - bank_type: MemoryBankType - provider_config: GenericProviderConfig = Field( - description="Provider config for the model, including provider_type, and corresponding config. ", - ) +class MemoryBankType(Enum): + vector = "vector" + keyvalue = "keyvalue" + keyword = "keyword" + graph = "graph" + + +class CommonDef(BaseModel): + identifier: str + provider_id: str + + +@json_schema_type +class VectorMemoryBankDef(CommonDef): + type: Literal[MemoryBankType.vector.value] = MemoryBankType.vector.value + embedding_model: str + chunk_size_in_tokens: int + overlap_size_in_tokens: Optional[int] = None + + +@json_schema_type +class KeyValueMemoryBankDef(CommonDef): + type: Literal[MemoryBankType.keyvalue.value] = MemoryBankType.keyvalue.value + + +@json_schema_type +class KeywordMemoryBankDef(CommonDef): + type: Literal[MemoryBankType.keyword.value] = MemoryBankType.keyword.value + + +@json_schema_type +class GraphMemoryBankDef(CommonDef): + type: Literal[MemoryBankType.graph.value] = MemoryBankType.graph.value + + +MemoryBankDef = Annotated[ + Union[ + VectorMemoryBankDef, + KeyValueMemoryBankDef, + KeywordMemoryBankDef, + GraphMemoryBankDef, + ], + Field(discriminator="type"), +] class MemoryBanks(Protocol): @webmethod(route="/memory_banks/list", method="GET") - async def list_available_memory_banks(self) -> List[MemoryBankSpec]: ... + async def list_memory_banks(self) -> List[MemoryBankDef]: ... @webmethod(route="/memory_banks/get", method="GET") - async def get_serving_memory_bank( - self, bank_type: MemoryBankType - ) -> Optional[MemoryBankSpec]: ... + async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: ... + + @webmethod(route="/memory_banks/register", method="POST") + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: ... diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 2952a8dee..21dd17ca2 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -6,27 +6,32 @@ from typing import List, Optional, Protocol -from llama_models.llama3.api.datatypes import Model - from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field -from llama_stack.distribution.datatypes import GenericProviderConfig - @json_schema_type -class ModelServingSpec(BaseModel): - llama_model: Model = Field( - description="All metadatas associated with llama model (defined in llama_models.models.sku_list).", +class ModelDef(BaseModel): + identifier: str = Field( + description="A unique identifier for the model type", ) - provider_config: GenericProviderConfig = Field( - description="Provider config for the model, including provider_type, and corresponding config. ", + llama_model: str = Field( + description="Pointer to the core Llama family model", ) + provider_id: str = Field( + description="The provider instance which serves this model" + ) + # For now, we are only supporting core llama models but as soon as finetuned + # and other custom models (for example various quantizations) are allowed, there + # will be more metadata fields here class Models(Protocol): @webmethod(route="/models/list", method="GET") - async def list_models(self) -> List[ModelServingSpec]: ... + async def list_models(self) -> List[ModelDef]: ... @webmethod(route="/models/get", method="GET") - async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]: ... + async def get_model(self, identifier: str) -> Optional[ModelDef]: ... + + @webmethod(route="/models/register", method="POST") + async def register_model(self, model: ModelDef) -> None: ... diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index 2b8242263..db507a383 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -4,25 +4,43 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import List, Optional, Protocol +from enum import Enum +from typing import Any, Dict, List, Optional, Protocol from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field -from llama_stack.distribution.datatypes import GenericProviderConfig - @json_schema_type -class ShieldSpec(BaseModel): - shield_type: str - provider_config: GenericProviderConfig = Field( - description="Provider config for the model, including provider_type, and corresponding config. ", +class ShieldType(Enum): + generic_content_shield = "generic_content_shield" + llama_guard = "llama_guard" + code_scanner = "code_scanner" + prompt_guard = "prompt_guard" + + +class ShieldDef(BaseModel): + identifier: str = Field( + description="A unique identifier for the shield type", + ) + provider_id: str = Field( + description="The provider instance which serves this shield" + ) + type: str = Field( + description="The type of shield this is; the value is one of the ShieldType enum" + ) + params: Dict[str, Any] = Field( + default_factory=dict, + description="Any additional parameters needed for this shield", ) class Shields(Protocol): @webmethod(route="/shields/list", method="GET") - async def list_shields(self) -> List[ShieldSpec]: ... + async def list_shields(self) -> List[ShieldDef]: ... @webmethod(route="/shields/get", method="GET") - async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]: ... + async def get_shield(self, shield_type: str) -> Optional[ShieldDef]: ... + + @webmethod(route="/shields/register", method="POST") + async def register_shield(self, shield: ShieldDef) -> None: ... diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index b8940ea49..e1b0aa39f 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -129,7 +129,10 @@ class StackConfigure(Subcommand): import yaml from termcolor import cprint - from llama_stack.distribution.configure import configure_api_providers + from llama_stack.distribution.configure import ( + configure_api_providers, + parse_and_maybe_upgrade_config, + ) from llama_stack.distribution.utils.serialize import EnumEncoder builds_dir = BUILDS_BASE_DIR / build_config.image_type @@ -145,7 +148,8 @@ class StackConfigure(Subcommand): "yellow", attrs=["bold"], ) - config = StackRunConfig(**yaml.safe_load(run_config_file.read_text())) + config_dict = yaml.safe_load(config_file.read_text()) + config = parse_and_maybe_upgrade_config(config_dict) else: config = StackRunConfig( built_at=datetime.now(), diff --git a/llama_stack/cli/tests/test_stack_build.py b/llama_stack/cli/tests/test_stack_build.py index 8b427a959..b04e80317 100644 --- a/llama_stack/cli/tests/test_stack_build.py +++ b/llama_stack/cli/tests/test_stack_build.py @@ -1,105 +1,142 @@ -from argparse import Namespace -from unittest.mock import MagicMock, patch - import pytest -from llama_stack.distribution.datatypes import BuildConfig -from llama_stack.cli.stack.build import StackBuild - - -# temporary while we make the tests work -pytest.skip(allow_module_level=True) +import yaml +from datetime import datetime +from llama_stack.distribution.configure import ( + parse_and_maybe_upgrade_config, + LLAMA_STACK_RUN_CONFIG_VERSION, +) @pytest.fixture -def stack_build(): - parser = MagicMock() - subparsers = MagicMock() - return StackBuild(subparsers) - - -def test_stack_build_initialization(stack_build): - assert stack_build.parser is not None - assert stack_build.parser.set_defaults.called_once_with( - func=stack_build._run_stack_build_command +def up_to_date_config(): + return yaml.safe_load( + """ + version: {version} + image_name: foo + apis_to_serve: [] + built_at: {built_at} + models: + - identifier: model1 + provider_id: provider1 + llama_model: Llama3.1-8B-Instruct + shields: + - identifier: shield1 + type: llama_guard + provider_id: provider1 + memory_banks: + - identifier: memory1 + type: vector + provider_id: provider1 + embedding_model: all-MiniLM-L6-v2 + chunk_size_in_tokens: 512 + providers: + inference: + - provider_id: provider1 + provider_type: meta-reference + config: {{}} + safety: + - provider_id: provider1 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + enable_prompt_guard: false + memory: + - provider_id: provider1 + provider_type: meta-reference + config: {{}} + """.format( + version=LLAMA_STACK_RUN_CONFIG_VERSION, built_at=datetime.now().isoformat() + ) ) -@patch("llama_stack.distribution.build.build_image") -def test_run_stack_build_command_with_config( - mock_build_image, mock_build_config, stack_build -): - args = Namespace( - config="test_config.yaml", - template=None, - list_templates=False, - name=None, - image_type="conda", +@pytest.fixture +def old_config(): + return yaml.safe_load( + """ + image_name: foo + built_at: {built_at} + apis_to_serve: [] + routing_table: + inference: + - provider_type: remote::ollama + config: + host: localhost + port: 11434 + routing_key: Llama3.2-1B-Instruct + - provider_type: meta-reference + config: + model: Llama3.1-8B-Instruct + routing_key: Llama3.1-8B-Instruct + safety: + - routing_key: ["shield1", "shield2"] + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + enable_prompt_guard: false + memory: + - routing_key: vector + provider_type: meta-reference + config: {{}} + api_providers: + telemetry: + provider_type: noop + config: {{}} + """.format(built_at=datetime.now().isoformat()) ) - with patch("builtins.open", MagicMock()): - with patch("yaml.safe_load") as mock_yaml_load: - mock_yaml_load.return_value = {"name": "test_build", "image_type": "conda"} - mock_build_config.return_value = MagicMock() - stack_build._run_stack_build_command(args) - - mock_build_config.assert_called_once() - mock_build_image.assert_called_once() +@pytest.fixture +def invalid_config(): + return yaml.safe_load(""" + routing_table: {} + api_providers: {} + """) -@patch("llama_stack.cli.table.print_table") -def test_run_stack_build_command_list_templates(mock_print_table, stack_build): - args = Namespace(list_templates=True) - - stack_build._run_stack_build_command(args) - - mock_print_table.assert_called_once() +def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config): + result = parse_and_maybe_upgrade_config(up_to_date_config) + assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION + assert len(result.models) == 1 + assert len(result.shields) == 1 + assert len(result.memory_banks) == 1 + assert "inference" in result.providers -@patch("prompt_toolkit.prompt") -@patch("llama_stack.distribution.datatypes.BuildConfig") -@patch("llama_stack.distribution.build.build_image") -def test_run_stack_build_command_interactive( - mock_build_image, mock_build_config, mock_prompt, stack_build -): - args = Namespace( - config=None, template=None, list_templates=False, name=None, image_type=None +def test_parse_and_maybe_upgrade_config_old_format(old_config): + result = parse_and_maybe_upgrade_config(old_config) + assert result.version == LLAMA_STACK_RUN_CONFIG_VERSION + assert len(result.models) == 2 + assert len(result.shields) == 2 + assert len(result.memory_banks) == 1 + assert all( + api in result.providers + for api in ["inference", "safety", "memory", "telemetry"] ) + safety_provider = result.providers["safety"][0] + assert safety_provider.provider_type == "meta-reference" + assert "llama_guard_shield" in safety_provider.config - mock_prompt.side_effect = [ - "test_name", - "conda", - "meta-reference", - "test description", - ] - mock_build_config.return_value = MagicMock() + inference_providers = result.providers["inference"] + assert len(inference_providers) == 2 + assert set(x.provider_id for x in inference_providers) == { + "remote::ollama-00", + "meta-reference-01", + } - stack_build._run_stack_build_command(args) - - assert mock_prompt.call_count == 4 - mock_build_config.assert_called_once() - mock_build_image.assert_called_once() + ollama = inference_providers[0] + assert ollama.provider_type == "remote::ollama" + assert ollama.config["port"] == 11434 -@patch("llama_stack.distribution.datatypes.BuildConfig") -@patch("llama_stack.distribution.build.build_image") -def test_run_stack_build_command_with_template( - mock_build_image, mock_build_config, stack_build -): - args = Namespace( - config=None, - template="test_template", - list_templates=False, - name="test_name", - image_type="docker", - ) - - with patch("builtins.open", MagicMock()): - with patch("yaml.safe_load") as mock_yaml_load: - mock_yaml_load.return_value = {"name": "test_build", "image_type": "conda"} - mock_build_config.return_value = MagicMock() - - stack_build._run_stack_build_command(args) - - mock_build_config.assert_called_once() - mock_build_image.assert_called_once() +def test_parse_and_maybe_upgrade_config_invalid(invalid_config): + with pytest.raises(ValueError): + parse_and_maybe_upgrade_config(invalid_config) diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index d678a2e00..1fdde3092 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -20,7 +20,6 @@ from prompt_toolkit import prompt from prompt_toolkit.validation import Validator from termcolor import cprint -from llama_stack.apis.memory.memory import MemoryBankType from llama_stack.distribution.distribution import ( builtin_automatically_routed_apis, get_provider_registry, @@ -177,9 +176,6 @@ def configure_api_providers( ) config.routing_table[api_str] = routing_entries - config.api_providers[api_str] = PlaceholderProviderConfig( - providers=p if isinstance(p, list) else [p] - ) else: config.api_providers[api_str] = GenericProviderConfig( provider_type=p, @@ -189,3 +185,102 @@ def configure_api_providers( print("") return config + + +def upgrade_from_routing_table_to_registry( + config_dict: Dict[str, Any], +) -> Dict[str, Any]: + def get_providers(entries): + return [ + Provider( + provider_id=f"{entry['provider_type']}-{i:02d}", + provider_type=entry["provider_type"], + config=entry["config"], + ) + for i, entry in enumerate(entries) + ] + + providers_by_api = {} + models = [] + shields = [] + memory_banks = [] + + routing_table = config_dict["routing_table"] + for api_str, entries in routing_table.items(): + providers = get_providers(entries) + providers_by_api[api_str] = providers + + if api_str == "inference": + for entry, provider in zip(entries, providers): + key = entry["routing_key"] + keys = key if isinstance(key, list) else [key] + for key in keys: + models.append( + ModelDef( + identifier=key, + provider_id=provider.provider_id, + llama_model=key, + ) + ) + elif api_str == "safety": + for entry, provider in zip(entries, providers): + key = entry["routing_key"] + keys = key if isinstance(key, list) else [key] + for key in keys: + shields.append( + ShieldDef( + identifier=key, + type=ShieldType.llama_guard.value, + provider_id=provider.provider_id, + ) + ) + elif api_str == "memory": + for entry, provider in zip(entries, providers): + key = entry["routing_key"] + keys = key if isinstance(key, list) else [key] + for key in keys: + # we currently only support Vector memory banks so this is OK + memory_banks.append( + VectorMemoryBankDef( + identifier=key, + provider_id=provider.provider_id, + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + ) + ) + config_dict["models"] = models + config_dict["shields"] = shields + config_dict["memory_banks"] = memory_banks + + if "api_providers" in config_dict: + for api_str, provider in config_dict["api_providers"].items(): + if isinstance(provider, dict): + providers_by_api[api_str] = [ + Provider( + provider_id=f"{provider['provider_type']}-00", + provider_type=provider["provider_type"], + config=provider["config"], + ) + ] + + config_dict["providers"] = providers_by_api + + del config_dict["routing_table"] + del config_dict["api_providers"] + + return config_dict + + +def parse_and_maybe_upgrade_config(config_dict: Dict[str, Any]) -> StackRunConfig: + version = config_dict.get("version", None) + if version == LLAMA_STACK_RUN_CONFIG_VERSION: + return StackRunConfig(**config_dict) + + if "models" not in config_dict: + print("Upgrading config...") + config_dict = upgrade_from_routing_table_to_registry(config_dict) + + config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION + config_dict["built_at"] = datetime.now().isoformat() + + return StackRunConfig(**config_dict) diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 09778a761..bccb7d705 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -11,10 +11,13 @@ from typing import Dict, List, Optional, Union from pydantic import BaseModel, Field from llama_stack.providers.datatypes import * # noqa: F403 +from llama_stack.apis.models import * # noqa: F403 +from llama_stack.apis.shields import * # noqa: F403 +from llama_stack.apis.memory_banks import * # noqa: F403 -LLAMA_STACK_BUILD_CONFIG_VERSION = "v1" -LLAMA_STACK_RUN_CONFIG_VERSION = "v1" +LLAMA_STACK_BUILD_CONFIG_VERSION = "2" +LLAMA_STACK_RUN_CONFIG_VERSION = "2" RoutingKey = Union[str, List[str]] @@ -29,12 +32,6 @@ class RoutableProviderConfig(GenericProviderConfig): routing_key: RoutingKey -class PlaceholderProviderConfig(BaseModel): - """Placeholder provider config for API whose provider are defined in routing_table""" - - providers: List[str] - - # Example: /inference, /safety class AutoRoutedProviderSpec(ProviderSpec): provider_type: str = "router" @@ -53,18 +50,16 @@ class AutoRoutedProviderSpec(ProviderSpec): # Example: /models, /shields -@json_schema_type class RoutingTableProviderSpec(ProviderSpec): provider_type: str = "routing_table" config_class: str = "" docker_image: Optional[str] = None - inner_specs: List[ProviderSpec] + router_api: Api module: str pip_packages: List[str] = Field(default_factory=list) -@json_schema_type class DistributionSpec(BaseModel): description: Optional[str] = Field( default="", @@ -80,7 +75,12 @@ in the runtime configuration to help route to the correct provider.""", ) -@json_schema_type +class Provider(BaseModel): + provider_id: str + provider_type: str + config: Dict[str, Any] + + class StackRunConfig(BaseModel): version: str = LLAMA_STACK_RUN_CONFIG_VERSION built_at: datetime @@ -105,31 +105,37 @@ this could be just a hash The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", ) - api_providers: Dict[ - str, Union[GenericProviderConfig, PlaceholderProviderConfig] - ] = Field( - description=""" -Provider configurations for each of the APIs provided by this package. -""", - ) - routing_table: Dict[str, List[RoutableProviderConfig]] = Field( - default_factory=dict, - description=""" + providers: Dict[str, List[Provider]] - E.g. The following is a ProviderRoutingEntry for models: - - routing_key: Llama3.1-8B-Instruct - provider_type: meta-reference - config: - model: Llama3.1-8B-Instruct - quantization: null - torch_seed: null - max_seq_len: 4096 - max_batch_size: 1 - """, - ) + models: List[ModelDef] + memory_banks: List[MemoryBankDef] + shields: List[ShieldDef] + + +# api_providers: Dict[ +# str, Union[GenericProviderConfig, PlaceholderProviderConfig] +# ] = Field( +# description=""" +# Provider configurations for each of the APIs provided by this package. +# """, +# ) +# routing_table: Dict[str, List[RoutableProviderConfig]] = Field( +# default_factory=dict, +# description=""" + +# E.g. The following is a ProviderRoutingEntry for models: +# - routing_key: Llama3.1-8B-Instruct +# provider_type: meta-reference +# config: +# model: Llama3.1-8B-Instruct +# quantization: null +# torch_seed: null +# max_seq_len: 4096 +# max_batch_size: 1 +# """, +# ) -@json_schema_type class BuildConfig(BaseModel): version: str = LLAMA_STACK_BUILD_CONFIG_VERSION name: str diff --git a/llama_stack/distribution/inspect.py b/llama_stack/distribution/inspect.py index acd7ab7f8..07a851e78 100644 --- a/llama_stack/distribution/inspect.py +++ b/llama_stack/distribution/inspect.py @@ -6,15 +6,19 @@ from typing import Dict, List from llama_stack.apis.inspect import * # noqa: F403 - +from pydantic import BaseModel from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.server.endpoints import get_all_api_endpoints from llama_stack.providers.datatypes import * # noqa: F403 -def is_passthrough(spec: ProviderSpec) -> bool: - return isinstance(spec, RemoteProviderSpec) and spec.adapter is None +class DistributionInspectConfig(BaseModel): + pass + + +def get_provider_impl(*args, **kwargs): + return DistributionInspectImpl() class DistributionInspectImpl(Inspect): diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index ae7d9ab40..ec8374290 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -12,138 +12,187 @@ from llama_stack.distribution.distribution import ( builtin_automatically_routed_apis, get_provider_registry, ) -from llama_stack.distribution.inspect import DistributionInspectImpl from llama_stack.distribution.utils.dynamic import instantiate_class_type +# TODO: make all this naming far less atrocious. Provider. ProviderSpec. ProviderWithSpec. WTF! +class ProviderWithSpec(Provider): + spec: ProviderSpec + + async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, Any]: """ Does two things: - flatmaps, sorts and resolves the providers in dependency order - for each API, produces either a (local, passthrough or router) implementation """ - all_providers = get_provider_registry() - specs = {} - configs = {} + all_api_providers = get_provider_registry() - for api_str, config in run_config.api_providers.items(): + auto_routed_apis = builtin_automatically_routed_apis() + providers_with_specs = {} + + for api_str, instances in run_config.providers.items(): api = Api(api_str) - - # TODO: check that these APIs are not in the routing table part of the config - providers = all_providers[api] - - # skip checks for API whose provider config is specified in routing_table - if isinstance(config, PlaceholderProviderConfig): - continue - - if config.provider_type not in providers: + if api in [a.routing_table_api for a in auto_routed_apis]: raise ValueError( - f"Provider `{config.provider_type}` is not available for API `{api}`" + f"Provider for `{api_str}` is automatically provided and cannot be overridden" ) - specs[api] = providers[config.provider_type] - configs[api] = config + + providers_with_specs[api] = {} + for config in instances: + if config.provider_type not in all_api_providers[api]: + raise ValueError( + f"Provider `{config.provider_type}` is not available for API `{api}`" + ) + + spec = ProviderWithSpec( + spec=all_api_providers[api][config.provider_type], + **config, + ) + providers_with_specs[api][spec.provider_id] = spec apis_to_serve = run_config.apis_to_serve or set( - list(specs.keys()) + list(run_config.routing_table.keys()) + list(providers_with_specs.keys()) + + [a.routing_table_api.value for a in auto_routed_apis] ) for info in builtin_automatically_routed_apis(): - source_api = info.routing_table_api - - assert ( - source_api not in specs - ), f"Routing table API {source_api} specified in wrong place?" - assert ( - info.router_api not in specs - ), f"Auto-routed API {info.router_api} specified in wrong place?" - if info.router_api.value not in apis_to_serve: continue - if info.router_api.value not in run_config.routing_table: - raise ValueError(f"Routing table for `{source_api.value}` is not provided?") + if info.routing_table_api.value not in run_config: + raise ValueError( + f"Registry for `{info.routing_table_api.value}` is not provided?" + ) - routing_table = run_config.routing_table[info.router_api.value] + available_providers = providers_with_specs[info.router_api] - providers = all_providers[info.router_api] - - inner_specs = [] inner_deps = [] - for rt_entry in routing_table: - if rt_entry.provider_type not in providers: + registry = run_config[info.routing_table_api.value] + for entry in registry: + if entry.provider_id not in available_providers: raise ValueError( - f"Provider `{rt_entry.provider_type}` is not available for API `{api}`" + f"Provider `{entry.provider_id}` not found. Available providers: {list(available_providers.keys())}" ) - inner_specs.append(providers[rt_entry.provider_type]) - inner_deps.extend(providers[rt_entry.provider_type].api_dependencies) - specs[source_api] = RoutingTableProviderSpec( - api=source_api, - module="llama_stack.distribution.routers", - api_dependencies=inner_deps, - inner_specs=inner_specs, + provider = available_providers[entry.provider_id] + inner_deps.extend(provider.spec.api_dependencies) + + providers_with_specs[info.routing_table_api] = { + "__builtin__": [ + ProviderWithSpec( + provider_id="__builtin__", + provider_type="__builtin__", + config=registry, + spec=RoutingTableProviderSpec( + api=info.routing_table_api, + router_api=info.router_api, + module="llama_stack.distribution.routers", + api_dependencies=inner_deps, + ), + ) + ] + } + + providers_with_specs[info.router_api] = { + "__builtin__": [ + ProviderWithSpec( + provider_id="__builtin__", + provider_type="__builtin__", + config={}, + spec=AutoRoutedProviderSpec( + api=info.router_api, + module="llama_stack.distribution.routers", + routing_table_api=source_api, + api_dependencies=[source_api], + ), + ) + ] + } + + sorted_providers = topological_sort(providers_with_specs) + sorted_providers.append( + ProviderWithSpec( + provider_id="__builtin__", + provider_type="__builtin__", + config={}, + spec=InlineProviderSpec( + api=Api.inspect, + provider_type="__builtin__", + config_class="llama_stack.distribution.inspect.DistributionInspectConfig", + module="llama_stack.distribution.inspect", + ), ) - configs[source_api] = routing_table - - specs[info.router_api] = AutoRoutedProviderSpec( - api=info.router_api, - module="llama_stack.distribution.routers", - routing_table_api=source_api, - api_dependencies=[source_api], - ) - configs[info.router_api] = {} - - sorted_specs = topological_sort(specs.values()) - print(f"Resolved {len(sorted_specs)} providers in topological order") - for spec in sorted_specs: - print(f" {spec.api}: {spec.provider_type}") - print("") - impls = {} - for spec in sorted_specs: - api = spec.api - deps = {api: impls[api] for api in spec.api_dependencies} - impl = await instantiate_provider(spec, deps, configs[api]) - - impls[api] = impl - - impls[Api.inspect] = DistributionInspectImpl() - specs[Api.inspect] = InlineProviderSpec( - api=Api.inspect, - provider_type="__distribution_builtin__", - config_class="", - module="", ) - return impls, specs + print(f"Resolved {len(sorted_providers)} providers in topological order") + for provider in sorted_providers: + print( + f" {provider.spec.api}: ({provider.provider_id}) {provider.spec.provider_type}" + ) + print("") + impls = {} + + impls_by_provider_id = {} + for provider in sorted_providers: + api = provider.spec.api + if api not in impls_by_provider_id: + impls_by_provider_id[api] = {} + + deps = {api: impls[api] for api in provider.spec.api_dependencies} + + inner_impls = {} + if isinstance(provider.spec, RoutingTableProviderSpec): + for entry in provider.config: + inner_impls[entry.provider_id] = impls_by_provider_id[ + provider.spec.router_api + ][entry.provider_id] + + impl = await instantiate_provider( + provider, + deps, + inner_impls, + ) + + impls[api] = impl + impls_by_provider_id[api][provider.provider_id] = impl + + return impls -def topological_sort(providers: List[ProviderSpec]) -> List[ProviderSpec]: - by_id = {x.api: x for x in providers} +def topological_sort( + providers_with_specs: Dict[Api, List[ProviderWithSpec]], +) -> List[ProviderWithSpec]: + def dfs(kv, visited: Set[Api], stack: List[Api]): + api, providers = kv + visited.add(api) - def dfs(a: ProviderSpec, visited: Set[Api], stack: List[Api]): - visited.add(a.api) - - for api in a.api_dependencies: + deps = [dep for x in providers for dep in x.api_dependencies] + for api in deps: if api not in visited: - dfs(by_id[api], visited, stack) + dfs((api, providers_with_specs[api]), visited, stack) - stack.append(a.api) + stack.append(api) visited = set() stack = [] - for a in providers: - if a.api not in visited: - dfs(a, visited, stack) + for api, providers in providers_with_specs.items(): + if api not in visited: + dfs((api, providers), visited, stack) - return [by_id[x] for x in stack] + flattened = [] + for api in stack: + flattened.extend(providers_with_specs[api]) + return flattened # returns a class implementing the protocol corresponding to the Api async def instantiate_provider( - provider_spec: ProviderSpec, + provider: ProviderWithSpec, deps: Dict[str, Any], - provider_config: Union[GenericProviderConfig, RoutingTable], + inner_impls: Dict[str, Any], ): + provider_spec = provider.spec module = importlib.import_module(provider_spec.module) args = [] @@ -165,21 +214,11 @@ async def instantiate_provider( elif isinstance(provider_spec, RoutingTableProviderSpec): method = "get_routing_table_impl" - assert isinstance(provider_config, List) - routing_table = provider_config - - inner_specs = {x.provider_type: x for x in provider_spec.inner_specs} - inner_impls = [] - for routing_entry in routing_table: - impl = await instantiate_provider( - inner_specs[routing_entry.provider_type], - deps, - routing_entry, - ) - inner_impls.append((routing_entry.routing_key, impl)) + assert isinstance(provider_config, list) + registry = provider_config config = None - args = [provider_spec.api, inner_impls, routing_table, deps] + args = [provider_spec.api, registry, inner_impls, deps] else: method = "get_provider_impl" diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/distribution/routers/__init__.py index 363c863aa..0464ab57a 100644 --- a/llama_stack/distribution/routers/__init__.py +++ b/llama_stack/distribution/routers/__init__.py @@ -4,23 +4,24 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, List, Tuple +from typing import Any, List from llama_stack.distribution.datatypes import * # noqa: F403 +from .routing_tables import ( + MemoryBanksRoutingTable, + ModelsRoutingTable, + RoutableObject, + RoutedProtocol, + ShieldsRoutingTable, +) async def get_routing_table_impl( api: Api, - inner_impls: List[Tuple[str, Any]], - routing_table_config: Dict[str, List[RoutableProviderConfig]], + registry: List[RoutableObject], + impls_by_provider_id: Dict[str, RoutedProtocol], _deps, ) -> Any: - from .routing_tables import ( - MemoryBanksRoutingTable, - ModelsRoutingTable, - ShieldsRoutingTable, - ) - api_to_tables = { "memory_banks": MemoryBanksRoutingTable, "models": ModelsRoutingTable, @@ -29,7 +30,7 @@ async def get_routing_table_impl( if api.value not in api_to_tables: raise ValueError(f"API {api.value} not found in router map") - impl = api_to_tables[api.value](inner_impls, routing_table_config) + impl = api_to_tables[api.value](registry, impls_by_provider_id) await impl.initialize() return impl diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index e5db17edc..01d92ff12 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -4,141 +4,106 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, List, Optional, Tuple +from typing import Any, List, Optional, Union -from llama_models.sku_list import resolve_model from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.models import * # noqa: F403 from llama_stack.apis.shields import * # noqa: F403 from llama_stack.apis.memory_banks import * # noqa: F403 +from llama_stack.apis.inference import Inference +from llama_stack.apis.memory import Memory +from llama_stack.apis.safety import Safety from llama_stack.distribution.datatypes import * # noqa: F403 +RoutableObject = Union[ + ModelDef, + ShieldDef, + MemoryBankDef, +] + +RoutedProtocol = Union[ + Inference, + Safety, + Memory, +] + + class CommonRoutingTableImpl(RoutingTable): def __init__( self, - inner_impls: List[Tuple[RoutingKey, Any]], - routing_table_config: Dict[str, List[RoutableProviderConfig]], + registry: List[RoutableObject], + impls_by_provider_id: Dict[str, RoutedProtocol], ) -> None: - self.unique_providers = [] - self.providers = {} - self.routing_keys = [] + for obj in registry: + if obj.provider_id not in impls_by_provider_id: + raise ValueError( + f"Provider `{obj.provider_id}` pointed by `{obj.identifier}` not found" + ) - for key, impl in inner_impls: - keys = key if isinstance(key, list) else [key] - self.unique_providers.append((keys, impl)) - - for k in keys: - if k in self.providers: - raise ValueError(f"Duplicate routing key {k}") - self.providers[k] = impl - self.routing_keys.append(k) - - self.routing_table_config = routing_table_config + self.impls_by_provider_id = impls_by_provider_id + self.registry = registry async def initialize(self) -> None: - for keys, p in self.unique_providers: + keys_by_provider = {} + for obj in self.registry: + keys = keys_by_provider.setdefault(obj.provider_id, []) + keys.append(obj.routing_key) + + for provider_id, keys in keys_by_provider.items(): + p = self.impls_by_provider_id[provider_id] spec = p.__provider_spec__ - if isinstance(spec, RemoteProviderSpec) and spec.adapter is None: + if is_passthrough(spec): continue await p.validate_routing_keys(keys) async def shutdown(self) -> None: - for _, p in self.unique_providers: - await p.shutdown() + pass def get_provider_impl(self, routing_key: str) -> Any: - if routing_key not in self.providers: + if routing_key not in self.routing_key_to_object: raise ValueError(f"Could not find provider for {routing_key}") - return self.providers[routing_key] + obj = self.routing_key_to_object[routing_key] + return self.impls_by_provider_id[obj.provider_id] - def get_routing_keys(self) -> List[str]: - return self.routing_keys - - def get_provider_config(self, routing_key: str) -> Optional[GenericProviderConfig]: - for entry in self.routing_table_config: - if entry.routing_key == routing_key: - return entry + def get_object_by_identifier(self, identifier: str) -> Optional[RoutableObject]: + for obj in self.registry: + if obj.identifier == identifier: + return obj return None class ModelsRoutingTable(CommonRoutingTableImpl, Models): + async def list_models(self) -> List[ModelDef]: + return self.registry - async def list_models(self) -> List[ModelServingSpec]: - specs = [] - for entry in self.routing_table_config: - model_id = entry.routing_key - specs.append( - ModelServingSpec( - llama_model=resolve_model(model_id), - provider_config=entry, - ) - ) - return specs + async def get_model(self, identifier: str) -> Optional[ModelDef]: + return self.get_object_by_identifier(identifier) - async def get_model(self, core_model_id: str) -> Optional[ModelServingSpec]: - for entry in self.routing_table_config: - if entry.routing_key == core_model_id: - return ModelServingSpec( - llama_model=resolve_model(core_model_id), - provider_config=entry, - ) - return None + async def register_model(self, model: ModelDef) -> None: + raise NotImplementedError() class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): + async def list_shields(self) -> List[ShieldDef]: + return self.registry - async def list_shields(self) -> List[ShieldSpec]: - specs = [] - for entry in self.routing_table_config: - if isinstance(entry.routing_key, list): - for k in entry.routing_key: - specs.append( - ShieldSpec( - shield_type=k, - provider_config=entry, - ) - ) - else: - specs.append( - ShieldSpec( - shield_type=entry.routing_key, - provider_config=entry, - ) - ) - return specs + async def get_shield(self, shield_type: str) -> Optional[ShieldDef]: + return self.get_object_by_identifier(shield_type) - async def get_shield(self, shield_type: str) -> Optional[ShieldSpec]: - for entry in self.routing_table_config: - if entry.routing_key == shield_type: - return ShieldSpec( - shield_type=entry.routing_key, - provider_config=entry, - ) - return None + async def register_shield(self, shield: ShieldDef) -> None: + raise NotImplementedError() class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): + async def list_memory_banks(self) -> List[MemoryBankDef]: + return self.registry - async def list_available_memory_banks(self) -> List[MemoryBankSpec]: - specs = [] - for entry in self.routing_table_config: - specs.append( - MemoryBankSpec( - bank_type=entry.routing_key, - provider_config=entry, - ) - ) - return specs + async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: + return self.get_object_by_identifier(identifier) - async def get_serving_memory_bank(self, bank_type: str) -> Optional[MemoryBankSpec]: - for entry in self.routing_table_config: - if entry.routing_key == bank_type: - return MemoryBankSpec( - bank_type=entry.routing_key, - provider_config=entry, - ) - return None + async def register_memory_bank(self, bank: MemoryBankDef) -> None: + raise NotImplementedError() diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 4013264df..f664bb674 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -285,7 +285,7 @@ def main( app = FastAPI() - impls, specs = asyncio.run(resolve_impls_with_routing(config)) + impls = asyncio.run(resolve_impls_with_routing(config)) if Api.telemetry in impls: setup_logger(impls[Api.telemetry]) @@ -303,11 +303,7 @@ def main( endpoints = all_endpoints[api] impl = impls[api] - provider_spec = specs[api] - if ( - isinstance(provider_spec, RemoteProviderSpec) - and provider_spec.adapter is None - ): + if is_passthrough(impl.__provider_spec__): for endpoint in endpoints: url = impl.__provider_config__.url.rstrip("/") + endpoint.route getattr(app, endpoint.method)(endpoint.route)( diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index a2e8851a2..abc1d601d 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -154,6 +154,10 @@ as being "Llama Stack compatible" return None +def is_passthrough(spec: ProviderSpec) -> bool: + return isinstance(spec, RemoteProviderSpec) and spec.adapter is None + + # Can avoid this by using Pydantic computed_field def remote_provider_spec( api: Api, adapter: Optional[AdapterSpec] = None diff --git a/llama_stack/providers/impls/meta_reference/safety/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py index 64a39b3c6..4f6de544b 100644 --- a/llama_stack/providers/impls/meta_reference/safety/config.py +++ b/llama_stack/providers/impls/meta_reference/safety/config.py @@ -9,7 +9,7 @@ from typing import List, Optional from llama_models.sku_list import CoreModelId, safety_models -from pydantic import BaseModel, validator +from pydantic import BaseModel, field_validator class MetaReferenceShieldType(Enum): @@ -25,7 +25,7 @@ class LlamaGuardShieldConfig(BaseModel): disable_input_check: bool = False disable_output_check: bool = False - @validator("model") + @field_validator("model") @classmethod def validate_model(cls, model: str) -> str: permitted_models = [ From 8d157a819773a27816e9e04e3638769501b5531c Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 09:04:50 -0700 Subject: [PATCH 34/69] rename --- ...st_stack_build.py => test_stack_config.py} | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) rename llama_stack/cli/tests/{test_stack_build.py => test_stack_config.py} (92%) diff --git a/llama_stack/cli/tests/test_stack_build.py b/llama_stack/cli/tests/test_stack_config.py similarity index 92% rename from llama_stack/cli/tests/test_stack_build.py rename to llama_stack/cli/tests/test_stack_config.py index b04e80317..0dd343930 100644 --- a/llama_stack/cli/tests/test_stack_build.py +++ b/llama_stack/cli/tests/test_stack_config.py @@ -1,9 +1,16 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from datetime import datetime + import pytest import yaml -from datetime import datetime from llama_stack.distribution.configure import ( - parse_and_maybe_upgrade_config, LLAMA_STACK_RUN_CONFIG_VERSION, + parse_and_maybe_upgrade_config, ) @@ -90,16 +97,20 @@ def old_config(): telemetry: provider_type: noop config: {{}} - """.format(built_at=datetime.now().isoformat()) + """.format( + built_at=datetime.now().isoformat() + ) ) @pytest.fixture def invalid_config(): - return yaml.safe_load(""" + return yaml.safe_load( + """ routing_table: {} api_providers: {} - """) + """ + ) def test_parse_and_maybe_upgrade_config_up_to_date(up_to_date_config): From 5a7b01d292e778ac76155885c914d5481f7874e5 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 11:12:46 -0700 Subject: [PATCH 35/69] Significantly upgrade the interactive configuration experience --- llama_stack/cli/stack/build.py | 3 - llama_stack/cli/stack/configure.py | 7 +- llama_stack/distribution/configure.py | 346 +++++++++++------- llama_stack/distribution/datatypes.py | 3 +- .../impls/meta_reference/safety/config.py | 2 - .../impls/meta_reference/safety/safety.py | 2 - .../safety/shields/llama_guard.py | 10 - 7 files changed, 217 insertions(+), 156 deletions(-) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 95df6a737..0cedbe901 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -150,9 +150,6 @@ class StackBuild(Subcommand): def _run_template_list_cmd(self, args: argparse.Namespace) -> None: import json - - import yaml - from llama_stack.cli.table import print_table # eventually, this should query a registry at llama.meta.com/llamastack/distributions diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index e1b0aa39f..76ade470e 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -148,14 +148,17 @@ class StackConfigure(Subcommand): "yellow", attrs=["bold"], ) - config_dict = yaml.safe_load(config_file.read_text()) + config_dict = yaml.safe_load(run_config_file.read_text()) config = parse_and_maybe_upgrade_config(config_dict) else: config = StackRunConfig( built_at=datetime.now(), image_name=image_name, apis_to_serve=[], - api_providers={}, + providers={}, + models=[], + shields=[], + memory_banks=[], ) config = configure_api_providers(config, build_config.distribution_spec) diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index 1fdde3092..b40cff242 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import textwrap from typing import Any @@ -14,7 +15,6 @@ from llama_models.sku_list import ( safety_models, ) -from pydantic import BaseModel from llama_stack.distribution.datatypes import * # noqa: F403 from prompt_toolkit import prompt from prompt_toolkit.validation import Validator @@ -23,14 +23,14 @@ from termcolor import cprint from llama_stack.distribution.distribution import ( builtin_automatically_routed_apis, get_provider_registry, - stack_apis, ) from llama_stack.distribution.utils.dynamic import instantiate_class_type - from llama_stack.distribution.utils.prompt_for_config import prompt_for_config -from llama_stack.providers.impls.meta_reference.safety.config import ( - MetaReferenceShieldType, -) + + +from llama_stack.apis.models import * # noqa: F403 +from llama_stack.apis.shields import * # noqa: F403 +from llama_stack.apis.memory_banks import * # noqa: F403 ALLOWED_MODELS = ( @@ -38,162 +38,233 @@ ALLOWED_MODELS = ( ) -def make_routing_entry_type(config_class: Any): - class BaseModelWithConfig(BaseModel): - routing_key: str - config: config_class +def configure_single_provider( + registry: Dict[str, ProviderSpec], provider: Provider +) -> Provider: + provider_spec = registry[provider.provider_type] + config_type = instantiate_class_type(provider_spec.config_class) + try: + if provider.config: + existing = config_type(**provider.config) + else: + existing = None + except Exception: + existing = None - return BaseModelWithConfig + cfg = prompt_for_config(config_type, existing) + return Provider( + provider_id=provider.provider_id, + provider_type=provider.provider_type, + config=cfg.dict(), + ) -def get_builtin_apis(provider_backed_apis: List[str]) -> List[str]: - """Get corresponding builtin APIs given provider backed APIs""" - res = [] - for inf in builtin_automatically_routed_apis(): - if inf.router_api.value in provider_backed_apis: - res.append(inf.routing_table_api.value) - - return res - - -# TODO: make sure we can deal with existing configuration values correctly -# instead of just overwriting them def configure_api_providers( - config: StackRunConfig, spec: DistributionSpec + config: StackRunConfig, build_spec: DistributionSpec ) -> StackRunConfig: - apis = config.apis_to_serve or list(spec.providers.keys()) - # append the bulitin routing APIs - apis += get_builtin_apis(apis) + is_nux = len(config.providers) == 0 - router_api2builtin_api = { - inf.router_api.value: inf.routing_table_api.value - for inf in builtin_automatically_routed_apis() - } + apis = set((config.apis_to_serve or list(build_spec.providers.keys()))) + config.apis_to_serve = [a for a in apis if a != "telemetry"] - config.apis_to_serve = list(set([a for a in apis if a != "telemetry"])) + if is_nux: + print( + textwrap.dedent( + """ + Llama Stack is composed of several APIs working together. For each API served by the Stack, + we need to configure the providers (implementations) you want to use for these APIs. +""" + ) + ) - apis = [v.value for v in stack_apis()] - all_providers = get_provider_registry() - - # configure simple case for with non-routing providers to api_providers - for api_str in spec.providers.keys(): - if api_str not in apis: + provider_registry = get_provider_registry() + builtin_apis = [a.routing_table_api for a in builtin_automatically_routed_apis()] + for api_str in config.apis_to_serve: + api = Api(api_str) + if api in builtin_apis: + continue + if api not in provider_registry: raise ValueError(f"Unknown API `{api_str}`") - cprint(f"Configuring API `{api_str}`...", "green", attrs=["bold"]) - api = Api(api_str) - - p = spec.providers[api_str] - cprint(f"=== Configuring provider `{p}` for API {api_str}...", "green") - - if isinstance(p, list): + existing_providers = config.providers.get(api_str, []) + if existing_providers: cprint( - f"[WARN] Interactive configuration of multiple providers {p} is not supported, configuring {p[0]} only, please manually configure {p[1:]} in routing_table of run.yaml", - "yellow", + f"Re-configuring existing providers for API `{api_str}`...", + "green", + attrs=["bold"], ) - p = p[0] - - provider_spec = all_providers[api][p] - config_type = instantiate_class_type(provider_spec.config_class) - try: - provider_config = config.api_providers.get(api_str) - if provider_config: - existing = config_type(**provider_config.config) - else: - existing = None - except Exception: - existing = None - cfg = prompt_for_config(config_type, existing) - - if api_str in router_api2builtin_api: - # a routing api, we need to infer and assign it a routing_key and put it in the routing_table - routing_key = "" - routing_entries = [] - if api_str == "inference": - if hasattr(cfg, "model"): - routing_key = cfg.model - else: - routing_key = prompt( - "> Please enter the supported model your provider has for inference: ", - default="Llama3.1-8B-Instruct", - validator=Validator.from_callable( - lambda x: resolve_model(x) is not None, - error_message="Model must be: {}".format( - [x.descriptor() for x in ALLOWED_MODELS] - ), - ), - ) - routing_entries.append( - RoutableProviderConfig( - routing_key=routing_key, - provider_type=p, - config=cfg.dict(), - ) + updated_providers = [] + for p in existing_providers: + print(f"> Configuring provider `({p.provider_type})`") + updated_providers.append( + configure_single_provider(provider_registry[api], p) ) - - if api_str == "safety": - # TODO: add support for other safety providers, and simplify safety provider config - if p == "meta-reference": - routing_entries.append( - RoutableProviderConfig( - routing_key=[s.value for s in MetaReferenceShieldType], - provider_type=p, - config=cfg.dict(), - ) - ) - else: - cprint( - f"[WARN] Interactive configuration of safety provider {p} is not supported. Please look for `{routing_key}` in run.yaml and replace it appropriately.", - "yellow", - attrs=["bold"], - ) - routing_entries.append( - RoutableProviderConfig( - routing_key=routing_key, - provider_type=p, - config=cfg.dict(), - ) - ) - - if api_str == "memory": - bank_types = list([x.value for x in MemoryBankType]) - routing_key = prompt( - "> Please enter the supported memory bank type your provider has for memory: ", - default="vector", - validator=Validator.from_callable( - lambda x: x in bank_types, - error_message="Invalid provider, please enter one of the following: {}".format( - bank_types - ), - ), - ) - routing_entries.append( - RoutableProviderConfig( - routing_key=routing_key, - provider_type=p, - config=cfg.dict(), - ) - ) - - config.routing_table[api_str] = routing_entries + print("") else: - config.api_providers[api_str] = GenericProviderConfig( - provider_type=p, - config=cfg.dict(), - ) + # we are newly configuring this API + plist = build_spec.providers.get(api_str, []) + plist = plist if isinstance(plist, list) else [plist] + if not plist: + raise ValueError(f"No provider configured for API {api_str}?") + + cprint(f"Configuring API `{api_str}`...", "green", attrs=["bold"]) + updated_providers = [] + for i, provider_type in enumerate(plist): + print(f"> Configuring provider `({provider_type})`") + updated_providers.append( + configure_single_provider( + provider_registry[api], + Provider( + provider_id=( + f"{provider_type}-{i:02d}" + if len(plist) > 1 + else provider_type + ), + provider_type=provider_type, + config={}, + ), + ) + ) + print("") + + config.providers[api_str] = updated_providers + + if is_nux: + print( + textwrap.dedent( + """ + ========================================================================================= + Now let's configure the `objects` you will be serving via the stack. These are: + + - Models: the Llama model SKUs you expect to inference (e.g., Llama3.2-1B-Instruct) + - Shields: the safety models you expect to use for safety (e.g., Llama-Guard-3-1B) + - Memory Banks: the memory banks you expect to use for memory (e.g., Vector stores) + + This wizard will guide you through setting up one of each of these objects. You can + always add more later by editing the run.yaml file. + """ + ) + ) + + object_types = { + "models": (ModelDef, configure_models, "inference"), + "shields": (ShieldDef, configure_shields, "safety"), + "memory_banks": (MemoryBankDef, configure_memory_banks, "memory"), + } + safety_providers = config.providers["safety"] + + for otype, (odef, config_method, api_str) in object_types.items(): + existing_objects = getattr(config, otype) + + if existing_objects: + cprint( + f"{len(existing_objects)} {otype} exist. Skipping...", + "blue", + attrs=["bold"], + ) + updated_objects = existing_objects + else: + # we are newly configuring this API + cprint(f"Configuring `{otype}`...", "blue", attrs=["bold"]) + updated_objects = config_method(config.providers[api_str], safety_providers) + + setattr(config, otype, updated_objects) print("") return config +def get_llama_guard_model(safety_providers: List[Provider]) -> Optional[str]: + if not safety_providers: + return None + + provider = safety_providers[0] + assert provider.provider_type == "meta-reference" + + cfg = provider.config["llama_guard_shield"] + if not cfg: + return None + return cfg["model"] + + +def configure_models( + providers: List[Provider], safety_providers: List[Provider] +) -> List[ModelDef]: + model = prompt( + "> Please enter the model you want to serve: ", + default="Llama3.2-1B-Instruct", + validator=Validator.from_callable( + lambda x: resolve_model(x) is not None, + error_message="Model must be: {}".format( + [x.descriptor() for x in ALLOWED_MODELS] + ), + ), + ) + model = ModelDef( + identifier=model, + llama_model=model, + provider_id=providers[0].provider_id, + ) + + ret = [model] + if llama_guard := get_llama_guard_model(safety_providers): + ret.append( + ModelDef( + identifier=llama_guard, + llama_model=llama_guard, + provider_id=providers[0].provider_id, + ) + ) + + return ret + + +def configure_shields( + providers: List[Provider], safety_providers: List[Provider] +) -> List[ShieldDef]: + if get_llama_guard_model(safety_providers): + return [ + ShieldDef( + identifier="llama_guard", + type="llama_guard", + provider_id=providers[0].provider_id, + params={}, + ) + ] + + return [] + + +def configure_memory_banks( + providers: List[Provider], safety_providers: List[Provider] +) -> List[MemoryBankDef]: + bank_name = prompt( + "> Please enter a name for your memory bank: ", + default="my-memory-bank", + ) + + return [ + VectorMemoryBankDef( + identifier=bank_name, + provider_id=providers[0].provider_id, + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + ) + ] + + def upgrade_from_routing_table_to_registry( config_dict: Dict[str, Any], ) -> Dict[str, Any]: def get_providers(entries): return [ Provider( - provider_id=f"{entry['provider_type']}-{i:02d}", + provider_id=( + f"{entry['provider_type']}-{i:02d}" + if len(entries) > 1 + else entry["provider_type"] + ), provider_type=entry["provider_type"], config=entry["config"], ) @@ -254,6 +325,9 @@ def upgrade_from_routing_table_to_registry( if "api_providers" in config_dict: for api_str, provider in config_dict["api_providers"].items(): + if api_str in ("inference", "safety", "memory"): + continue + if isinstance(provider, dict): providers_by_api[api_str] = [ Provider( diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index bccb7d705..0ee03175c 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -75,6 +75,7 @@ in the runtime configuration to help route to the correct provider.""", ) +# TODO: rename as ProviderInstanceConfig class Provider(BaseModel): provider_id: str provider_type: str @@ -108,8 +109,8 @@ The list of APIs to serve. If not specified, all APIs specified in the provider_ providers: Dict[str, List[Provider]] models: List[ModelDef] - memory_banks: List[MemoryBankDef] shields: List[ShieldDef] + memory_banks: List[MemoryBankDef] # api_providers: Dict[ diff --git a/llama_stack/providers/impls/meta_reference/safety/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py index 4f6de544b..51d2ae2bf 100644 --- a/llama_stack/providers/impls/meta_reference/safety/config.py +++ b/llama_stack/providers/impls/meta_reference/safety/config.py @@ -22,8 +22,6 @@ class MetaReferenceShieldType(Enum): class LlamaGuardShieldConfig(BaseModel): model: str = "Llama-Guard-3-1B" excluded_categories: List[str] = [] - disable_input_check: bool = False - disable_output_check: bool = False @field_validator("model") @classmethod diff --git a/llama_stack/providers/impls/meta_reference/safety/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py index 0ac3b6244..bf19a3010 100644 --- a/llama_stack/providers/impls/meta_reference/safety/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -91,8 +91,6 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): model=cfg.model, inference_api=self.inference_api, excluded_categories=cfg.excluded_categories, - disable_input_check=cfg.disable_input_check, - disable_output_check=cfg.disable_output_check, ) elif typ == MetaReferenceShieldType.jailbreak_shield: from .shields import JailbreakShield diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py b/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py index f98d95c43..19a20a899 100644 --- a/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py +++ b/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py @@ -113,8 +113,6 @@ class LlamaGuardShield(ShieldBase): model: str, inference_api: Inference, excluded_categories: List[str] = None, - disable_input_check: bool = False, - disable_output_check: bool = False, on_violation_action: OnViolationAction = OnViolationAction.RAISE, ): super().__init__(on_violation_action) @@ -132,8 +130,6 @@ class LlamaGuardShield(ShieldBase): self.model = model self.inference_api = inference_api self.excluded_categories = excluded_categories - self.disable_input_check = disable_input_check - self.disable_output_check = disable_output_check def check_unsafe_response(self, response: str) -> Optional[str]: match = re.match(r"^unsafe\n(.*)$", response) @@ -180,12 +176,6 @@ class LlamaGuardShield(ShieldBase): async def run(self, messages: List[Message]) -> ShieldResponse: messages = self.validate_messages(messages) - if self.disable_input_check and messages[-1].role == Role.user.value: - return ShieldResponse(is_violation=False) - elif self.disable_output_check and messages[-1].role == Role.assistant.value: - return ShieldResponse( - is_violation=False, - ) if self.model == CoreModelId.llama_guard_3_11b_vision.value: shield_input_message = self.build_vision_shield_input(messages) From 4215cc9331b1daff089241d14c22244dec81ef07 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 22:17:06 -0700 Subject: [PATCH 36/69] Push registration methods onto the backing providers --- llama_stack/apis/agents/agents.py | 2 +- llama_stack/apis/inference/inference.py | 10 + llama_stack/apis/memory/memory.py | 10 + llama_stack/apis/safety/safety.py | 10 + llama_stack/distribution/datatypes.py | 17 ++ llama_stack/distribution/resolver.py | 194 +++++++++--------- llama_stack/distribution/routers/__init__.py | 2 - .../distribution/routers/routing_tables.py | 63 +++--- .../adapters/safety/together/together.py | 46 +++-- llama_stack/providers/datatypes.py | 10 - .../impls/meta_reference/memory/faiss.py | 36 ++-- .../impls/meta_reference/safety/config.py | 8 +- .../impls/meta_reference/safety/safety.py | 79 ++++--- .../providers/utils/memory/vector_store.py | 2 +- 14 files changed, 269 insertions(+), 220 deletions(-) diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index d008331d5..f9ad44efc 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -261,7 +261,7 @@ class Session(BaseModel): turns: List[Turn] started_at: datetime - memory_bank: Optional[MemoryBank] = None + memory_bank: Optional[MemoryBankDef] = None class AgentConfigCommon(BaseModel): diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 428f29b88..5374f2efb 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -14,6 +14,7 @@ from pydantic import BaseModel, Field from typing_extensions import Annotated from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_stack.apis.models import * # noqa: F403 class LogProbConfig(BaseModel): @@ -203,3 +204,12 @@ class Inference(Protocol): model: str, contents: List[InterleavedTextMedia], ) -> EmbeddingsResponse: ... + + @webmethod(route="/inference/register_model") + async def register_model(self, model: ModelDef) -> None: ... + + @webmethod(route="/inference/list_models") + async def list_models(self) -> List[ModelDef]: ... + + @webmethod(route="/inference/get_model") + async def get_model(self, identifier: str) -> Optional[ModelDef]: ... diff --git a/llama_stack/apis/memory/memory.py b/llama_stack/apis/memory/memory.py index 8ac4a08a6..86dcbbcdc 100644 --- a/llama_stack/apis/memory/memory.py +++ b/llama_stack/apis/memory/memory.py @@ -15,6 +15,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel, Field from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_stack.apis.memory_banks import * # noqa: F403 @json_schema_type @@ -76,3 +77,12 @@ class Memory(Protocol): bank_id: str, document_ids: List[str], ) -> None: ... + + @webmethod(route="/memory/register_memory_bank") + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: ... + + @webmethod(route="/memory/list_memory_banks") + async def list_memory_banks(self) -> List[MemoryBankDef]: ... + + @webmethod(route="/memory/get_memory_bank") + async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: ... diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py index ed3a42f66..a3c94d136 100644 --- a/llama_stack/apis/safety/safety.py +++ b/llama_stack/apis/safety/safety.py @@ -11,6 +11,7 @@ from llama_models.schema_utils import json_schema_type, webmethod from pydantic import BaseModel from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_stack.apis.shields import * # noqa: F403 @json_schema_type @@ -42,3 +43,12 @@ class Safety(Protocol): async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None ) -> RunShieldResponse: ... + + @webmethod(route="/safety/register_shield") + async def register_shield(self, shield: ShieldDef) -> None: ... + + @webmethod(route="/safety/list_shields") + async def list_shields(self) -> List[ShieldDef]: ... + + @webmethod(route="/safety/get_shield") + async def get_shield(self, identifier: str) -> Optional[ShieldDef]: ... diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 0ee03175c..05b2ad0d6 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -14,6 +14,9 @@ from llama_stack.providers.datatypes import * # noqa: F403 from llama_stack.apis.models import * # noqa: F403 from llama_stack.apis.shields import * # noqa: F403 from llama_stack.apis.memory_banks import * # noqa: F403 +from llama_stack.apis.inference import Inference +from llama_stack.apis.memory import Memory +from llama_stack.apis.safety import Safety LLAMA_STACK_BUILD_CONFIG_VERSION = "2" @@ -23,6 +26,19 @@ LLAMA_STACK_RUN_CONFIG_VERSION = "2" RoutingKey = Union[str, List[str]] +RoutableObject = Union[ + ModelDef, + ShieldDef, + MemoryBankDef, +] + +RoutedProtocol = Union[ + Inference, + Safety, + Memory, +] + + class GenericProviderConfig(BaseModel): provider_type: str config: Dict[str, Any] @@ -56,6 +72,7 @@ class RoutingTableProviderSpec(ProviderSpec): docker_image: Optional[str] = None router_api: Api + registry: List[RoutableObject] module: str pip_packages: List[str] = Field(default_factory=list) diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index ec8374290..660d84fc8 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -28,46 +28,48 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An """ all_api_providers = get_provider_registry() - auto_routed_apis = builtin_automatically_routed_apis() + routing_table_apis = set( + x.routing_table_api for x in builtin_automatically_routed_apis() + ) + router_apis = set(x.router_api for x in builtin_automatically_routed_apis()) + providers_with_specs = {} - for api_str, instances in run_config.providers.items(): + for api_str, providers in run_config.providers.items(): api = Api(api_str) - if api in [a.routing_table_api for a in auto_routed_apis]: + if api in routing_table_apis: raise ValueError( f"Provider for `{api_str}` is automatically provided and cannot be overridden" ) - providers_with_specs[api] = {} - for config in instances: - if config.provider_type not in all_api_providers[api]: + specs = {} + for provider in providers: + if provider.provider_type not in all_api_providers[api]: raise ValueError( - f"Provider `{config.provider_type}` is not available for API `{api}`" + f"Provider `{provider.provider_type}` is not available for API `{api}`" ) spec = ProviderWithSpec( - spec=all_api_providers[api][config.provider_type], - **config, + spec=all_api_providers[api][provider.provider_type], + **(provider.dict()), ) - providers_with_specs[api][spec.provider_id] = spec + specs[provider.provider_id] = spec + + key = api_str if api not in router_apis else f"inner-{api_str}" + providers_with_specs[key] = specs apis_to_serve = run_config.apis_to_serve or set( - list(providers_with_specs.keys()) - + [a.routing_table_api.value for a in auto_routed_apis] + list(providers_with_specs.keys()) + list(routing_table_apis) ) + for info in builtin_automatically_routed_apis(): if info.router_api.value not in apis_to_serve: continue - if info.routing_table_api.value not in run_config: - raise ValueError( - f"Registry for `{info.routing_table_api.value}` is not provided?" - ) - - available_providers = providers_with_specs[info.router_api] + available_providers = providers_with_specs[f"inner-{info.router_api.value}"] inner_deps = [] - registry = run_config[info.routing_table_api.value] + registry = getattr(run_config, info.routing_table_api.value) for entry in registry: if entry.provider_id not in available_providers: raise ValueError( @@ -77,74 +79,70 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An provider = available_providers[entry.provider_id] inner_deps.extend(provider.spec.api_dependencies) - providers_with_specs[info.routing_table_api] = { - "__builtin__": [ - ProviderWithSpec( - provider_id="__builtin__", - provider_type="__builtin__", - config=registry, - spec=RoutingTableProviderSpec( - api=info.routing_table_api, - router_api=info.router_api, - module="llama_stack.distribution.routers", - api_dependencies=inner_deps, - ), - ) - ] + providers_with_specs[info.routing_table_api.value] = { + "__builtin__": ProviderWithSpec( + provider_id="__builtin__", + provider_type="__routing_table__", + config={}, + spec=RoutingTableProviderSpec( + api=info.routing_table_api, + router_api=info.router_api, + registry=registry, + module="llama_stack.distribution.routers", + api_dependencies=inner_deps, + ), + ) } - providers_with_specs[info.router_api] = { - "__builtin__": [ - ProviderWithSpec( - provider_id="__builtin__", - provider_type="__builtin__", - config={}, - spec=AutoRoutedProviderSpec( - api=info.router_api, - module="llama_stack.distribution.routers", - routing_table_api=source_api, - api_dependencies=[source_api], - ), - ) - ] + providers_with_specs[info.router_api.value] = { + "__builtin__": ProviderWithSpec( + provider_id="__builtin__", + provider_type="__autorouted__", + config={}, + spec=AutoRoutedProviderSpec( + api=info.router_api, + module="llama_stack.distribution.routers", + routing_table_api=info.routing_table_api, + api_dependencies=[info.routing_table_api], + ), + ) } - sorted_providers = topological_sort(providers_with_specs) + sorted_providers = topological_sort( + {k: v.values() for k, v in providers_with_specs.items()} + ) sorted_providers.append( - ProviderWithSpec( - provider_id="__builtin__", - provider_type="__builtin__", - config={}, - spec=InlineProviderSpec( - api=Api.inspect, + ( + "inspect", + ProviderWithSpec( + provider_id="__builtin__", provider_type="__builtin__", - config_class="llama_stack.distribution.inspect.DistributionInspectConfig", - module="llama_stack.distribution.inspect", + config={}, + spec=InlineProviderSpec( + api=Api.inspect, + provider_type="__builtin__", + config_class="llama_stack.distribution.inspect.DistributionInspectConfig", + module="llama_stack.distribution.inspect", + ), ), ) ) print(f"Resolved {len(sorted_providers)} providers in topological order") - for provider in sorted_providers: - print( - f" {provider.spec.api}: ({provider.provider_id}) {provider.spec.provider_type}" - ) + for api_str, provider in sorted_providers: + print(f" {api_str}: ({provider.provider_id}) {provider.spec.provider_type}") print("") + impls = {} - - impls_by_provider_id = {} - for provider in sorted_providers: - api = provider.spec.api - if api not in impls_by_provider_id: - impls_by_provider_id[api] = {} - - deps = {api: impls[api] for api in provider.spec.api_dependencies} + inner_impls_by_provider_id = {f"inner-{x}": {} for x in router_apis} + for api_str, provider in sorted_providers: + deps = {a: impls[a] for a in provider.spec.api_dependencies} inner_impls = {} if isinstance(provider.spec, RoutingTableProviderSpec): - for entry in provider.config: - inner_impls[entry.provider_id] = impls_by_provider_id[ - provider.spec.router_api + for entry in provider.spec.registry: + inner_impls[entry.provider_id] = inner_impls_by_provider_id[ + f"inner-{provider.spec.router_api.value}" ][entry.provider_id] impl = await instantiate_provider( @@ -152,37 +150,46 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An deps, inner_impls, ) - - impls[api] = impl - impls_by_provider_id[api][provider.provider_id] = impl + if "inner-" in api_str: + inner_impls_by_provider_id[api_str][provider.provider_id] = impl + else: + api = Api(api_str) + impls[api] = impl return impls def topological_sort( - providers_with_specs: Dict[Api, List[ProviderWithSpec]], + providers_with_specs: Dict[str, List[ProviderWithSpec]], ) -> List[ProviderWithSpec]: - def dfs(kv, visited: Set[Api], stack: List[Api]): - api, providers = kv - visited.add(api) + def dfs(kv, visited: Set[str], stack: List[str]): + api_str, providers = kv + visited.add(api_str) - deps = [dep for x in providers for dep in x.api_dependencies] - for api in deps: - if api not in visited: - dfs((api, providers_with_specs[api]), visited, stack) + deps = [] + for provider in providers: + for dep in provider.spec.api_dependencies: + deps.append(dep.value) + if isinstance(provider, AutoRoutedProviderSpec): + deps.append(f"inner-{provider.api}") - stack.append(api) + for dep in deps: + if dep not in visited: + dfs((dep, providers_with_specs[dep]), visited, stack) + + stack.append(api_str) visited = set() stack = [] - for api, providers in providers_with_specs.items(): - if api not in visited: - dfs((api, providers), visited, stack) + for api_str, providers in providers_with_specs.items(): + if api_str not in visited: + dfs((api_str, providers), visited, stack) flattened = [] - for api in stack: - flattened.extend(providers_with_specs[api]) + for api_str in stack: + for provider in providers_with_specs[api_str]: + flattened.append((api_str, provider)) return flattened @@ -202,9 +209,8 @@ async def instantiate_provider( else: method = "get_client_impl" - assert isinstance(provider_config, GenericProviderConfig) config_type = instantiate_class_type(provider_spec.config_class) - config = config_type(**provider_config.config) + config = config_type(**provider.config) args = [config, deps] elif isinstance(provider_spec, AutoRoutedProviderSpec): method = "get_auto_router_impl" @@ -214,17 +220,13 @@ async def instantiate_provider( elif isinstance(provider_spec, RoutingTableProviderSpec): method = "get_routing_table_impl" - assert isinstance(provider_config, list) - registry = provider_config - config = None - args = [provider_spec.api, registry, inner_impls, deps] + args = [provider_spec.api, provider_spec.registry, inner_impls, deps] else: method = "get_provider_impl" - assert isinstance(provider_config, GenericProviderConfig) config_type = instantiate_class_type(provider_spec.config_class) - config = config_type(**provider_config.config) + config = config_type(**provider.config) args = [config, deps] fn = getattr(module, method) diff --git a/llama_stack/distribution/routers/__init__.py b/llama_stack/distribution/routers/__init__.py index 0464ab57a..9935ecd7d 100644 --- a/llama_stack/distribution/routers/__init__.py +++ b/llama_stack/distribution/routers/__init__.py @@ -10,8 +10,6 @@ from llama_stack.distribution.datatypes import * # noqa: F403 from .routing_tables import ( MemoryBanksRoutingTable, ModelsRoutingTable, - RoutableObject, - RoutedProtocol, ShieldsRoutingTable, ) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 01d92ff12..fbc3eae32 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -4,33 +4,17 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Any, List, Optional, Union +from typing import Any, List, Optional from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.models import * # noqa: F403 from llama_stack.apis.shields import * # noqa: F403 from llama_stack.apis.memory_banks import * # noqa: F403 -from llama_stack.apis.inference import Inference -from llama_stack.apis.memory import Memory -from llama_stack.apis.safety import Safety from llama_stack.distribution.datatypes import * # noqa: F403 -RoutableObject = Union[ - ModelDef, - ShieldDef, - MemoryBankDef, -] - -RoutedProtocol = Union[ - Inference, - Safety, - Memory, -] - - class CommonRoutingTableImpl(RoutingTable): def __init__( self, @@ -46,19 +30,14 @@ class CommonRoutingTableImpl(RoutingTable): self.impls_by_provider_id = impls_by_provider_id self.registry = registry - async def initialize(self) -> None: - keys_by_provider = {} + self.routing_key_to_object = {} for obj in self.registry: - keys = keys_by_provider.setdefault(obj.provider_id, []) - keys.append(obj.routing_key) + self.routing_key_to_object[obj.identifier] = obj - for provider_id, keys in keys_by_provider.items(): - p = self.impls_by_provider_id[provider_id] - spec = p.__provider_spec__ - if is_passthrough(spec): - continue - - await p.validate_routing_keys(keys) + async def initialize(self) -> None: + for obj in self.registry: + p = self.impls_by_provider_id[obj.provider_id] + await self.register_object(obj, p) async def shutdown(self) -> None: pass @@ -75,8 +54,24 @@ class CommonRoutingTableImpl(RoutingTable): return obj return None + def register_object(self, obj: RoutableObject) -> None: + if obj.identifier in self.routing_key_to_object: + raise ValueError(f"Object `{obj.identifier}` already registered") + + if obj.provider_id not in self.impls_by_provider_id: + raise ValueError(f"Provider `{obj.provider_id}` not found") + + p = self.impls_by_provider_id[obj.provider_id] + await p.register_object(obj) + + self.routing_key_to_object[obj.identifier] = obj + self.registry.append(obj) + class ModelsRoutingTable(CommonRoutingTableImpl, Models): + async def register_object(self, obj: ModelDef, p: Inference) -> None: + await p.register_model(obj) + async def list_models(self) -> List[ModelDef]: return self.registry @@ -84,10 +79,13 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): return self.get_object_by_identifier(identifier) async def register_model(self, model: ModelDef) -> None: - raise NotImplementedError() + await self.register_object(model) class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): + async def register_object(self, obj: ShieldDef, p: Safety) -> None: + await p.register_shield(obj) + async def list_shields(self) -> List[ShieldDef]: return self.registry @@ -95,10 +93,13 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): return self.get_object_by_identifier(shield_type) async def register_shield(self, shield: ShieldDef) -> None: - raise NotImplementedError() + await self.register_object(shield) class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): + async def register_object(self, obj: MemoryBankDef, p: Memory) -> None: + await p.register_memory_bank(obj) + async def list_memory_banks(self) -> List[MemoryBankDef]: return self.registry @@ -106,4 +107,4 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): return self.get_object_by_identifier(identifier) async def register_memory_bank(self, bank: MemoryBankDef) -> None: - raise NotImplementedError() + await self.register_object(bank) diff --git a/llama_stack/providers/adapters/safety/together/together.py b/llama_stack/providers/adapters/safety/together/together.py index c7a667e01..9d9fa6a4e 100644 --- a/llama_stack/providers/adapters/safety/together/together.py +++ b/llama_stack/providers/adapters/safety/together/together.py @@ -6,28 +6,23 @@ from together import Together from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.apis.safety import ( - RunShieldResponse, - Safety, - SafetyViolation, - ViolationLevel, -) -from llama_stack.distribution.datatypes import RoutableProvider +from llama_stack.apis.safety import * # noqa: F403 from llama_stack.distribution.request_headers import NeedsRequestProviderData from .config import TogetherSafetyConfig -SAFETY_SHIELD_TYPES = { +SAFETY_SHIELD_MODEL_MAP = { "llama_guard": "meta-llama/Meta-Llama-Guard-3-8B", "Llama-Guard-3-8B": "meta-llama/Meta-Llama-Guard-3-8B", "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision-Turbo", } -class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider): +class TogetherSafetyImpl(Safety, NeedsRequestProviderData): def __init__(self, config: TogetherSafetyConfig) -> None: self.config = config + self.register_shields = [] async def initialize(self) -> None: pass @@ -35,16 +30,31 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - for key in routing_keys: - if key not in SAFETY_SHIELD_TYPES: - raise ValueError(f"Unknown safety shield type: {key}") + async def register_shield(self, shield: ShieldDef) -> None: + if shield.type != ShieldType.llama_guard.value: + raise ValueError(f"Unsupported safety shield type: {shield.type}") + + self.registered_shields.append(shield) + + async def list_shields(self) -> List[ShieldDef]: + return self.registered_shields + + async def get_shield(self, identifier: str) -> Optional[ShieldDef]: + for shield in self.registered_shields: + if shield.identifier == identifier: + return shield + return None async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None ) -> RunShieldResponse: - if shield_type not in SAFETY_SHIELD_TYPES: - raise ValueError(f"Unknown safety shield type: {shield_type}") + shield_def = await self.get_shield(shield_type) + if not shield_def: + raise ValueError(f"Unknown shield {shield_type}") + + model = shield_def.params.get("model", "llama_guard") + if model not in SAFETY_SHIELD_MODEL_MAP: + raise ValueError(f"Unsupported safety model: {model}") together_api_key = None if self.config.api_key is not None: @@ -57,17 +67,13 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData, RoutableProvider): ) together_api_key = provider_data.together_api_key - model_name = SAFETY_SHIELD_TYPES[shield_type] - # messages can have role assistant or user api_messages = [] for message in messages: if message.role in (Role.user.value, Role.assistant.value): api_messages.append({"role": message.role, "content": message.content}) - violation = await get_safety_response( - together_api_key, model_name, api_messages - ) + violation = await get_safety_response(together_api_key, model, api_messages) return RunShieldResponse(violation=violation) diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index abc1d601d..a254e2808 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -48,16 +48,6 @@ class RoutingTable(Protocol): def get_provider_impl(self, routing_key: str) -> Any: ... -class RoutableProvider(Protocol): - """ - A provider which sits behind the RoutingTable and can get routed to. - - All Inference / Safety / Memory providers fall into this bucket. - """ - - async def validate_routing_keys(self, keys: List[str]) -> None: ... - - @json_schema_type class AdapterSpec(BaseModel): adapter_type: str = Field( diff --git a/llama_stack/providers/impls/meta_reference/memory/faiss.py b/llama_stack/providers/impls/meta_reference/memory/faiss.py index b9a00908e..4f592e5e0 100644 --- a/llama_stack/providers/impls/meta_reference/memory/faiss.py +++ b/llama_stack/providers/impls/meta_reference/memory/faiss.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import logging -import uuid from typing import Any, Dict, List, Optional @@ -72,38 +71,29 @@ class FaissMemoryImpl(Memory, RoutableProvider): async def shutdown(self) -> None: ... - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - print(f"[faiss] Registering memory bank routing keys: {routing_keys}") - pass - - async def create_memory_bank( + async def register_memory_bank( self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - assert url is None, "URL is not supported for this implementation" + memory_bank: MemoryBankDef, + ) -> None: assert ( - config.type == MemoryBankType.vector.value - ), f"Only vector banks are supported {config.type}" + memory_bank.type == MemoryBankType.vector.value + ), f"Only vector banks are supported {memory_bank.type}" - bank_id = str(uuid.uuid4()) - bank = MemoryBank( - bank_id=bank_id, - name=name, - config=config, - url=url, + index = BankWithIndex( + bank=memory_bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION) ) - index = BankWithIndex(bank=bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION)) - self.cache[bank_id] = index + self.cache[memory_bank.identifier] = index return bank - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: - index = self.cache.get(bank_id) + async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: + index = self.cache.get(identifier) if index is None: return None return index.bank + async def list_memory_banks(self) -> List[MemoryBankDef]: + return [x.bank for x in self.cache.values()] + async def insert_documents( self, bank_id: str, diff --git a/llama_stack/providers/impls/meta_reference/safety/config.py b/llama_stack/providers/impls/meta_reference/safety/config.py index 51d2ae2bf..14233ad0c 100644 --- a/llama_stack/providers/impls/meta_reference/safety/config.py +++ b/llama_stack/providers/impls/meta_reference/safety/config.py @@ -12,11 +12,9 @@ from llama_models.sku_list import CoreModelId, safety_models from pydantic import BaseModel, field_validator -class MetaReferenceShieldType(Enum): - llama_guard = "llama_guard" - code_scanner_guard = "code_scanner_guard" - injection_shield = "injection_shield" - jailbreak_shield = "jailbreak_shield" +class PromptGuardType(Enum): + injection = "injection" + jailbreak = "jailbreak" class LlamaGuardShieldConfig(BaseModel): diff --git a/llama_stack/providers/impls/meta_reference/safety/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py index bf19a3010..5154acd77 100644 --- a/llama_stack/providers/impls/meta_reference/safety/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -10,23 +10,36 @@ from llama_stack.distribution.utils.model_utils import model_local_dir from llama_stack.apis.inference import * # noqa: F403 from llama_stack.apis.safety import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.distribution.datatypes import Api, RoutableProvider +from llama_stack.distribution.datatypes import Api from llama_stack.providers.impls.meta_reference.safety.shields.base import ( OnViolationAction, ) -from .config import MetaReferenceShieldType, SafetyConfig +from .config import SafetyConfig -from .shields import CodeScannerShield, LlamaGuardShield, ShieldBase +from .shields import ( + CodeScannerShield, + InjectionShield, + JailbreakShield, + LlamaGuardShield, + ShieldBase, +) PROMPT_GUARD_MODEL = "Prompt-Guard-86M" -class MetaReferenceSafetyImpl(Safety, RoutableProvider): +class MetaReferenceSafetyImpl(Safety): def __init__(self, config: SafetyConfig, deps) -> None: self.config = config self.inference_api = deps[Api.inference] + self.registered_shields = [] + + self.available_shields = [ShieldType.code_scanner.value] + if config.llama_guard_shield: + self.available_shields.append(ShieldType.llama_guard.value) + if config.enable_prompt_guard: + self.available_shields.append(ShieldType.prompt_guard.value) async def initialize(self) -> None: if self.config.enable_prompt_guard: @@ -38,11 +51,20 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - available_shields = [v.value for v in MetaReferenceShieldType] - for key in routing_keys: - if key not in available_shields: - raise ValueError(f"Unknown safety shield type: {key}") + async def register_shield(self, shield: ShieldDef) -> None: + if shield.type not in self.available_shields: + raise ValueError(f"Unsupported safety shield type: {shield.type}") + + self.registered_shields.append(shield) + + async def list_shields(self) -> List[ShieldDef]: + return self.registered_shields + + async def get_shield(self, identifier: str) -> Optional[ShieldDef]: + for shield in self.registered_shields: + if shield.identifier == identifier: + return shield + return None async def run_shield( self, @@ -50,10 +72,11 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): messages: List[Message], params: Dict[str, Any] = None, ) -> RunShieldResponse: - available_shields = [v.value for v in MetaReferenceShieldType] - assert shield_type in available_shields, f"Unknown shield {shield_type}" + shield_def = await self.get_shield(shield_type) + if not shield_def: + raise ValueError(f"Unknown shield {shield_type}") - shield = self.get_shield_impl(MetaReferenceShieldType(shield_type)) + shield = self.get_shield_impl(shield_def) messages = messages.copy() # some shields like llama-guard require the first message to be a user message @@ -79,30 +102,24 @@ class MetaReferenceSafetyImpl(Safety, RoutableProvider): return RunShieldResponse(violation=violation) - def get_shield_impl(self, typ: MetaReferenceShieldType) -> ShieldBase: - cfg = self.config - if typ == MetaReferenceShieldType.llama_guard: - cfg = cfg.llama_guard_shield - assert ( - cfg is not None - ), "Cannot use LlamaGuardShield since not present in config" - + def get_shield_impl(self, shield: ShieldDef) -> ShieldBase: + if shield.type == ShieldType.llama_guard.value: + cfg = self.config.llama_guard_shield return LlamaGuardShield( model=cfg.model, inference_api=self.inference_api, excluded_categories=cfg.excluded_categories, ) - elif typ == MetaReferenceShieldType.jailbreak_shield: - from .shields import JailbreakShield - + elif shield.type == ShieldType.prompt_guard.value: model_dir = model_local_dir(PROMPT_GUARD_MODEL) - return JailbreakShield.instance(model_dir) - elif typ == MetaReferenceShieldType.injection_shield: - from .shields import InjectionShield - - model_dir = model_local_dir(PROMPT_GUARD_MODEL) - return InjectionShield.instance(model_dir) - elif typ == MetaReferenceShieldType.code_scanner_guard: + subtype = shield.params.get("prompt_guard_type", "injection") + if subtype == "injection": + return InjectionShield.instance(model_dir) + elif subtype == "jailbreak": + return JailbreakShield.instance(model_dir) + else: + raise ValueError(f"Unknown prompt guard type: {subtype}") + elif shield.type == ShieldType.code_scanner.value: return CodeScannerShield.instance() else: - raise ValueError(f"Unknown shield type: {typ}") + raise ValueError(f"Unknown shield type: {shield.type}") diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 1683ddaa1..0540cdf60 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -146,7 +146,7 @@ class EmbeddingIndex(ABC): @dataclass class BankWithIndex: - bank: MemoryBank + bank: MemoryBankDef index: EmbeddingIndex async def insert_documents( From 59302a86dfe1b140b79b0bb1851d2ceb85aeb6b9 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 22:25:48 -0700 Subject: [PATCH 37/69] inference registry updates --- llama_stack/distribution/inspect.py | 9 +- llama_stack/distribution/resolver.py | 3 +- llama_stack/distribution/routers/routers.py | 55 +- .../distribution/routers/routing_tables.py | 10 +- .../adapters/inference/bedrock/bedrock.py | 890 +++++++++--------- .../adapters/inference/fireworks/fireworks.py | 6 +- .../adapters/inference/ollama/ollama.py | 7 +- .../adapters/inference/together/together.py | 6 +- .../meta_reference/inference/inference.py | 28 +- .../impls/meta_reference/memory/faiss.py | 4 +- .../utils/inference/model_registry.py | 51 + .../providers/utils/inference/routable.py | 36 - 12 files changed, 570 insertions(+), 535 deletions(-) create mode 100644 llama_stack/providers/utils/inference/model_registry.py delete mode 100644 llama_stack/providers/utils/inference/routable.py diff --git a/llama_stack/distribution/inspect.py b/llama_stack/distribution/inspect.py index 07a851e78..9963fffd8 100644 --- a/llama_stack/distribution/inspect.py +++ b/llama_stack/distribution/inspect.py @@ -17,14 +17,19 @@ class DistributionInspectConfig(BaseModel): pass -def get_provider_impl(*args, **kwargs): - return DistributionInspectImpl() +async def get_provider_impl(*args, **kwargs): + impl = DistributionInspectImpl() + await impl.initialize() + return impl class DistributionInspectImpl(Inspect): def __init__(self): pass + async def initialize(self) -> None: + pass + async def list_providers(self) -> Dict[str, List[ProviderInfo]]: ret = {} all_providers = get_provider_registry() diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 660d84fc8..2c383587c 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -20,6 +20,7 @@ class ProviderWithSpec(Provider): spec: ProviderSpec +# TODO: this code is not very straightforward to follow and needs one more round of refactoring async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, Any]: """ Does two things: @@ -134,7 +135,7 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An print("") impls = {} - inner_impls_by_provider_id = {f"inner-{x}": {} for x in router_apis} + inner_impls_by_provider_id = {f"inner-{x.value}": {} for x in router_apis} for api_str, provider in sorted_providers: deps = {a: impls[a] for a in provider.spec.api_dependencies} diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index c360bcfb0..c56b33f21 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -14,14 +14,13 @@ from llama_stack.apis.safety import * # noqa: F403 class MemoryRouter(Memory): - """Routes to an provider based on the memory bank type""" + """Routes to an provider based on the memory bank identifier""" def __init__( self, routing_table: RoutingTable, ) -> None: self.routing_table = routing_table - self.bank_id_to_type = {} async def initialize(self) -> None: pass @@ -29,32 +28,14 @@ class MemoryRouter(Memory): async def shutdown(self) -> None: pass - def get_provider_from_bank_id(self, bank_id: str) -> Any: - bank_type = self.bank_id_to_type.get(bank_id) - if not bank_type: - raise ValueError(f"Could not find bank type for {bank_id}") + async def list_memory_banks(self) -> List[MemoryBankDef]: + return self.routing_table.list_memory_banks() - provider = self.routing_table.get_provider_impl(bank_type) - if not provider: - raise ValueError(f"Could not find provider for {bank_type}") - return provider + async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: + return self.routing_table.get_memory_bank(identifier) - async def create_memory_bank( - self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - bank_type = config.type - bank = await self.routing_table.get_provider_impl(bank_type).create_memory_bank( - name, config, url - ) - self.bank_id_to_type[bank.bank_id] = bank_type - return bank - - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: - provider = self.get_provider_from_bank_id(bank_id) - return await provider.get_memory_bank(bank_id) + async def register_memory_bank(self, bank: MemoryBankDef) -> None: + await self.routing_table.register_memory_bank(bank) async def insert_documents( self, @@ -62,7 +43,7 @@ class MemoryRouter(Memory): documents: List[MemoryBankDocument], ttl_seconds: Optional[int] = None, ) -> None: - return await self.get_provider_from_bank_id(bank_id).insert_documents( + return await self.routing_table.get_provider_impl(bank_id).insert_documents( bank_id, documents, ttl_seconds ) @@ -72,7 +53,7 @@ class MemoryRouter(Memory): query: InterleavedTextMedia, params: Optional[Dict[str, Any]] = None, ) -> QueryDocumentsResponse: - return await self.get_provider_from_bank_id(bank_id).query_documents( + return await self.routing_table.get_provider_impl(bank_id).query_documents( bank_id, query, params ) @@ -92,6 +73,15 @@ class InferenceRouter(Inference): async def shutdown(self) -> None: pass + async def list_models(self) -> List[ModelDef]: + return self.routing_table.list_models() + + async def get_model(self, identifier: str) -> Optional[ModelDef]: + return self.routing_table.get_model(identifier) + + async def register_model(self, model: ModelDef) -> None: + await self.routing_table.register_model(model) + async def chat_completion( self, model: str, @@ -159,6 +149,15 @@ class SafetyRouter(Safety): async def shutdown(self) -> None: pass + async def list_shields(self) -> List[ShieldDef]: + return self.routing_table.list_shields() + + async def get_shield(self, shield_type: str) -> Optional[ShieldDef]: + return self.routing_table.get_shield(shield_type) + + async def register_shield(self, shield: ShieldDef) -> None: + await self.routing_table.register_shield(shield) + async def run_shield( self, shield_type: str, diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index fbc3eae32..350ab05fe 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -15,6 +15,8 @@ from llama_stack.apis.memory_banks import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 +# TODO: this routing table maintains state in memory purely. We need to +# add persistence to it when we add dynamic registration of objects. class CommonRoutingTableImpl(RoutingTable): def __init__( self, @@ -54,7 +56,7 @@ class CommonRoutingTableImpl(RoutingTable): return obj return None - def register_object(self, obj: RoutableObject) -> None: + async def register_object_common(self, obj: RoutableObject) -> None: if obj.identifier in self.routing_key_to_object: raise ValueError(f"Object `{obj.identifier}` already registered") @@ -79,7 +81,7 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): return self.get_object_by_identifier(identifier) async def register_model(self, model: ModelDef) -> None: - await self.register_object(model) + await self.register_object_common(model) class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): @@ -93,7 +95,7 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): return self.get_object_by_identifier(shield_type) async def register_shield(self, shield: ShieldDef) -> None: - await self.register_object(shield) + await self.register_object_common(shield) class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): @@ -107,4 +109,4 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): return self.get_object_by_identifier(identifier) async def register_memory_bank(self, bank: MemoryBankDef) -> None: - await self.register_object(bank) + await self.register_object_common(bank) diff --git a/llama_stack/providers/adapters/inference/bedrock/bedrock.py b/llama_stack/providers/adapters/inference/bedrock/bedrock.py index 9c1db4bdb..7f51894bc 100644 --- a/llama_stack/providers/adapters/inference/bedrock/bedrock.py +++ b/llama_stack/providers/adapters/inference/bedrock/bedrock.py @@ -1,445 +1,445 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import * # noqa: F403 - -import boto3 -from botocore.client import BaseClient -from botocore.config import Config - -from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.tokenizer import Tokenizer - -from llama_stack.providers.utils.inference.routable import RoutableProviderForModels - -from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.adapters.inference.bedrock.config import BedrockConfig - - -BEDROCK_SUPPORTED_MODELS = { - "Llama3.1-8B-Instruct": "meta.llama3-1-8b-instruct-v1:0", - "Llama3.1-70B-Instruct": "meta.llama3-1-70b-instruct-v1:0", - "Llama3.1-405B-Instruct": "meta.llama3-1-405b-instruct-v1:0", -} - - -class BedrockInferenceAdapter(Inference, RoutableProviderForModels): - - @staticmethod - def _create_bedrock_client(config: BedrockConfig) -> BaseClient: - retries_config = { - k: v - for k, v in dict( - total_max_attempts=config.total_max_attempts, - mode=config.retry_mode, - ).items() - if v is not None - } - - config_args = { - k: v - for k, v in dict( - region_name=config.region_name, - retries=retries_config if retries_config else None, - connect_timeout=config.connect_timeout, - read_timeout=config.read_timeout, - ).items() - if v is not None - } - - boto3_config = Config(**config_args) - - session_args = { - k: v - for k, v in dict( - aws_access_key_id=config.aws_access_key_id, - aws_secret_access_key=config.aws_secret_access_key, - aws_session_token=config.aws_session_token, - region_name=config.region_name, - profile_name=config.profile_name, - ).items() - if v is not None - } - - boto3_session = boto3.session.Session(**session_args) - - return boto3_session.client("bedrock-runtime", config=boto3_config) - - def __init__(self, config: BedrockConfig) -> None: - RoutableProviderForModels.__init__( - self, stack_to_provider_models_map=BEDROCK_SUPPORTED_MODELS - ) - self._config = config - - self._client = BedrockInferenceAdapter._create_bedrock_client(config) - tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(tokenizer) - - @property - def client(self) -> BaseClient: - return self._client - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - self.client.close() - - async def completion( - self, - model: str, - content: InterleavedTextMedia, - sampling_params: Optional[SamplingParams] = SamplingParams(), - stream: Optional[bool] = False, - logprobs: Optional[LogProbConfig] = None, - ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: - raise NotImplementedError() - - @staticmethod - def _bedrock_stop_reason_to_stop_reason(bedrock_stop_reason: str) -> StopReason: - if bedrock_stop_reason == "max_tokens": - return StopReason.out_of_tokens - return StopReason.end_of_turn - - @staticmethod - def _builtin_tool_name_to_enum(tool_name_str: str) -> Union[BuiltinTool, str]: - for builtin_tool in BuiltinTool: - if builtin_tool.value == tool_name_str: - return builtin_tool - else: - return tool_name_str - - @staticmethod - def _bedrock_message_to_message(converse_api_res: Dict) -> Message: - stop_reason = BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason( - converse_api_res["stopReason"] - ) - - bedrock_message = converse_api_res["output"]["message"] - - role = bedrock_message["role"] - contents = bedrock_message["content"] - - tool_calls = [] - text_content = [] - for content in contents: - if "toolUse" in content: - tool_use = content["toolUse"] - tool_calls.append( - ToolCall( - tool_name=BedrockInferenceAdapter._builtin_tool_name_to_enum( - tool_use["name"] - ), - arguments=tool_use["input"] if "input" in tool_use else None, - call_id=tool_use["toolUseId"], - ) - ) - elif "text" in content: - text_content.append(content["text"]) - - return CompletionMessage( - role=role, - content=text_content, - stop_reason=stop_reason, - tool_calls=tool_calls, - ) - - @staticmethod - def _messages_to_bedrock_messages( - messages: List[Message], - ) -> Tuple[List[Dict], Optional[List[Dict]]]: - bedrock_messages = [] - system_bedrock_messages = [] - - user_contents = [] - assistant_contents = None - for message in messages: - role = message.role - content_list = ( - message.content - if isinstance(message.content, list) - else [message.content] - ) - if role == "ipython" or role == "user": - if not user_contents: - user_contents = [] - - if role == "ipython": - user_contents.extend( - [ - { - "toolResult": { - "toolUseId": message.call_id, - "content": [ - {"text": content} for content in content_list - ], - } - } - ] - ) - else: - user_contents.extend( - [{"text": content} for content in content_list] - ) - - if assistant_contents: - bedrock_messages.append( - {"role": "assistant", "content": assistant_contents} - ) - assistant_contents = None - elif role == "system": - system_bedrock_messages.extend( - [{"text": content} for content in content_list] - ) - elif role == "assistant": - if not assistant_contents: - assistant_contents = [] - - assistant_contents.extend( - [ - { - "text": content, - } - for content in content_list - ] - + [ - { - "toolUse": { - "input": tool_call.arguments, - "name": ( - tool_call.tool_name - if isinstance(tool_call.tool_name, str) - else tool_call.tool_name.value - ), - "toolUseId": tool_call.call_id, - } - } - for tool_call in message.tool_calls - ] - ) - - if user_contents: - bedrock_messages.append({"role": "user", "content": user_contents}) - user_contents = None - else: - # Unknown role - pass - - if user_contents: - bedrock_messages.append({"role": "user", "content": user_contents}) - if assistant_contents: - bedrock_messages.append( - {"role": "assistant", "content": assistant_contents} - ) - - if system_bedrock_messages: - return bedrock_messages, system_bedrock_messages - - return bedrock_messages, None - - @staticmethod - def get_bedrock_inference_config(sampling_params: Optional[SamplingParams]) -> Dict: - inference_config = {} - if sampling_params: - param_mapping = { - "max_tokens": "maxTokens", - "temperature": "temperature", - "top_p": "topP", - } - - for k, v in param_mapping.items(): - if getattr(sampling_params, k): - inference_config[v] = getattr(sampling_params, k) - - return inference_config - - @staticmethod - def _tool_parameters_to_input_schema( - tool_parameters: Optional[Dict[str, ToolParamDefinition]] - ) -> Dict: - input_schema = {"type": "object"} - if not tool_parameters: - return input_schema - - json_properties = {} - required = [] - for name, param in tool_parameters.items(): - json_property = { - "type": param.param_type, - } - - if param.description: - json_property["description"] = param.description - if param.required: - required.append(name) - json_properties[name] = json_property - - input_schema["properties"] = json_properties - if required: - input_schema["required"] = required - return input_schema - - @staticmethod - def _tools_to_tool_config( - tools: Optional[List[ToolDefinition]], tool_choice: Optional[ToolChoice] - ) -> Optional[Dict]: - if not tools: - return None - - bedrock_tools = [] - for tool in tools: - tool_name = ( - tool.tool_name - if isinstance(tool.tool_name, str) - else tool.tool_name.value - ) - - tool_spec = { - "toolSpec": { - "name": tool_name, - "inputSchema": { - "json": BedrockInferenceAdapter._tool_parameters_to_input_schema( - tool.parameters - ), - }, - } - } - - if tool.description: - tool_spec["toolSpec"]["description"] = tool.description - - bedrock_tools.append(tool_spec) - tool_config = { - "tools": bedrock_tools, - } - - if tool_choice: - tool_config["toolChoice"] = ( - {"any": {}} - if tool_choice.value == ToolChoice.required - else {"auto": {}} - ) - return tool_config - - async def chat_completion( - self, - model: str, - messages: List[Message], - sampling_params: Optional[SamplingParams] = SamplingParams(), - # zero-shot tool definitions as input to the model - tools: Optional[List[ToolDefinition]] = None, - tool_choice: Optional[ToolChoice] = ToolChoice.auto, - tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, - stream: Optional[bool] = False, - logprobs: Optional[LogProbConfig] = None, - ) -> ( - AsyncGenerator - ): # Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: - bedrock_model = self.map_to_provider_model(model) - inference_config = BedrockInferenceAdapter.get_bedrock_inference_config( - sampling_params - ) - - tool_config = BedrockInferenceAdapter._tools_to_tool_config(tools, tool_choice) - bedrock_messages, system_bedrock_messages = ( - BedrockInferenceAdapter._messages_to_bedrock_messages(messages) - ) - - converse_api_params = { - "modelId": bedrock_model, - "messages": bedrock_messages, - } - if inference_config: - converse_api_params["inferenceConfig"] = inference_config - - # Tool use is not supported in streaming mode - if tool_config and not stream: - converse_api_params["toolConfig"] = tool_config - if system_bedrock_messages: - converse_api_params["system"] = system_bedrock_messages - - if not stream: - converse_api_res = self.client.converse(**converse_api_params) - - output_message = BedrockInferenceAdapter._bedrock_message_to_message( - converse_api_res - ) - - yield ChatCompletionResponse( - completion_message=output_message, - logprobs=None, - ) - else: - converse_stream_api_res = self.client.converse_stream(**converse_api_params) - event_stream = converse_stream_api_res["stream"] - - for chunk in event_stream: - if "messageStart" in chunk: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - elif "contentBlockStart" in chunk: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=ToolCall( - tool_name=chunk["contentBlockStart"]["toolUse"][ - "name" - ], - call_id=chunk["contentBlockStart"]["toolUse"][ - "toolUseId" - ], - ), - parse_status=ToolCallParseStatus.started, - ), - ) - ) - elif "contentBlockDelta" in chunk: - if "text" in chunk["contentBlockDelta"]["delta"]: - delta = chunk["contentBlockDelta"]["delta"]["text"] - else: - delta = ToolCallDelta( - content=ToolCall( - arguments=chunk["contentBlockDelta"]["delta"][ - "toolUse" - ]["input"] - ), - parse_status=ToolCallParseStatus.success, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - ) - ) - elif "contentBlockStop" in chunk: - # Ignored - pass - elif "messageStop" in chunk: - stop_reason = ( - BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason( - chunk["messageStop"]["stopReason"] - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) - elif "metadata" in chunk: - # Ignored - pass - else: - # Ignored - pass +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import * # noqa: F403 + +import boto3 +from botocore.client import BaseClient +from botocore.config import Config + +from llama_models.llama3.api.chat_format import ChatFormat +from llama_models.llama3.api.tokenizer import Tokenizer + +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper + +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.adapters.inference.bedrock.config import BedrockConfig + + +BEDROCK_SUPPORTED_MODELS = { + "Llama3.1-8B-Instruct": "meta.llama3-1-8b-instruct-v1:0", + "Llama3.1-70B-Instruct": "meta.llama3-1-70b-instruct-v1:0", + "Llama3.1-405B-Instruct": "meta.llama3-1-405b-instruct-v1:0", +} + + +class BedrockInferenceAdapter(ModelRegistryHelper, Inference): + + @staticmethod + def _create_bedrock_client(config: BedrockConfig) -> BaseClient: + retries_config = { + k: v + for k, v in dict( + total_max_attempts=config.total_max_attempts, + mode=config.retry_mode, + ).items() + if v is not None + } + + config_args = { + k: v + for k, v in dict( + region_name=config.region_name, + retries=retries_config if retries_config else None, + connect_timeout=config.connect_timeout, + read_timeout=config.read_timeout, + ).items() + if v is not None + } + + boto3_config = Config(**config_args) + + session_args = { + k: v + for k, v in dict( + aws_access_key_id=config.aws_access_key_id, + aws_secret_access_key=config.aws_secret_access_key, + aws_session_token=config.aws_session_token, + region_name=config.region_name, + profile_name=config.profile_name, + ).items() + if v is not None + } + + boto3_session = boto3.session.Session(**session_args) + + return boto3_session.client("bedrock-runtime", config=boto3_config) + + def __init__(self, config: BedrockConfig) -> None: + ModelRegistryHelper.__init__( + self, stack_to_provider_models_map=BEDROCK_SUPPORTED_MODELS + ) + self._config = config + + self._client = BedrockInferenceAdapter._create_bedrock_client(config) + tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(tokenizer) + + @property + def client(self) -> BaseClient: + return self._client + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + self.client.close() + + async def completion( + self, + model: str, + content: InterleavedTextMedia, + sampling_params: Optional[SamplingParams] = SamplingParams(), + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: + raise NotImplementedError() + + @staticmethod + def _bedrock_stop_reason_to_stop_reason(bedrock_stop_reason: str) -> StopReason: + if bedrock_stop_reason == "max_tokens": + return StopReason.out_of_tokens + return StopReason.end_of_turn + + @staticmethod + def _builtin_tool_name_to_enum(tool_name_str: str) -> Union[BuiltinTool, str]: + for builtin_tool in BuiltinTool: + if builtin_tool.value == tool_name_str: + return builtin_tool + else: + return tool_name_str + + @staticmethod + def _bedrock_message_to_message(converse_api_res: Dict) -> Message: + stop_reason = BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason( + converse_api_res["stopReason"] + ) + + bedrock_message = converse_api_res["output"]["message"] + + role = bedrock_message["role"] + contents = bedrock_message["content"] + + tool_calls = [] + text_content = [] + for content in contents: + if "toolUse" in content: + tool_use = content["toolUse"] + tool_calls.append( + ToolCall( + tool_name=BedrockInferenceAdapter._builtin_tool_name_to_enum( + tool_use["name"] + ), + arguments=tool_use["input"] if "input" in tool_use else None, + call_id=tool_use["toolUseId"], + ) + ) + elif "text" in content: + text_content.append(content["text"]) + + return CompletionMessage( + role=role, + content=text_content, + stop_reason=stop_reason, + tool_calls=tool_calls, + ) + + @staticmethod + def _messages_to_bedrock_messages( + messages: List[Message], + ) -> Tuple[List[Dict], Optional[List[Dict]]]: + bedrock_messages = [] + system_bedrock_messages = [] + + user_contents = [] + assistant_contents = None + for message in messages: + role = message.role + content_list = ( + message.content + if isinstance(message.content, list) + else [message.content] + ) + if role == "ipython" or role == "user": + if not user_contents: + user_contents = [] + + if role == "ipython": + user_contents.extend( + [ + { + "toolResult": { + "toolUseId": message.call_id, + "content": [ + {"text": content} for content in content_list + ], + } + } + ] + ) + else: + user_contents.extend( + [{"text": content} for content in content_list] + ) + + if assistant_contents: + bedrock_messages.append( + {"role": "assistant", "content": assistant_contents} + ) + assistant_contents = None + elif role == "system": + system_bedrock_messages.extend( + [{"text": content} for content in content_list] + ) + elif role == "assistant": + if not assistant_contents: + assistant_contents = [] + + assistant_contents.extend( + [ + { + "text": content, + } + for content in content_list + ] + + [ + { + "toolUse": { + "input": tool_call.arguments, + "name": ( + tool_call.tool_name + if isinstance(tool_call.tool_name, str) + else tool_call.tool_name.value + ), + "toolUseId": tool_call.call_id, + } + } + for tool_call in message.tool_calls + ] + ) + + if user_contents: + bedrock_messages.append({"role": "user", "content": user_contents}) + user_contents = None + else: + # Unknown role + pass + + if user_contents: + bedrock_messages.append({"role": "user", "content": user_contents}) + if assistant_contents: + bedrock_messages.append( + {"role": "assistant", "content": assistant_contents} + ) + + if system_bedrock_messages: + return bedrock_messages, system_bedrock_messages + + return bedrock_messages, None + + @staticmethod + def get_bedrock_inference_config(sampling_params: Optional[SamplingParams]) -> Dict: + inference_config = {} + if sampling_params: + param_mapping = { + "max_tokens": "maxTokens", + "temperature": "temperature", + "top_p": "topP", + } + + for k, v in param_mapping.items(): + if getattr(sampling_params, k): + inference_config[v] = getattr(sampling_params, k) + + return inference_config + + @staticmethod + def _tool_parameters_to_input_schema( + tool_parameters: Optional[Dict[str, ToolParamDefinition]] + ) -> Dict: + input_schema = {"type": "object"} + if not tool_parameters: + return input_schema + + json_properties = {} + required = [] + for name, param in tool_parameters.items(): + json_property = { + "type": param.param_type, + } + + if param.description: + json_property["description"] = param.description + if param.required: + required.append(name) + json_properties[name] = json_property + + input_schema["properties"] = json_properties + if required: + input_schema["required"] = required + return input_schema + + @staticmethod + def _tools_to_tool_config( + tools: Optional[List[ToolDefinition]], tool_choice: Optional[ToolChoice] + ) -> Optional[Dict]: + if not tools: + return None + + bedrock_tools = [] + for tool in tools: + tool_name = ( + tool.tool_name + if isinstance(tool.tool_name, str) + else tool.tool_name.value + ) + + tool_spec = { + "toolSpec": { + "name": tool_name, + "inputSchema": { + "json": BedrockInferenceAdapter._tool_parameters_to_input_schema( + tool.parameters + ), + }, + } + } + + if tool.description: + tool_spec["toolSpec"]["description"] = tool.description + + bedrock_tools.append(tool_spec) + tool_config = { + "tools": bedrock_tools, + } + + if tool_choice: + tool_config["toolChoice"] = ( + {"any": {}} + if tool_choice.value == ToolChoice.required + else {"auto": {}} + ) + return tool_config + + async def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + # zero-shot tool definitions as input to the model + tools: Optional[List[ToolDefinition]] = None, + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> ( + AsyncGenerator + ): # Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: + bedrock_model = self.map_to_provider_model(model) + inference_config = BedrockInferenceAdapter.get_bedrock_inference_config( + sampling_params + ) + + tool_config = BedrockInferenceAdapter._tools_to_tool_config(tools, tool_choice) + bedrock_messages, system_bedrock_messages = ( + BedrockInferenceAdapter._messages_to_bedrock_messages(messages) + ) + + converse_api_params = { + "modelId": bedrock_model, + "messages": bedrock_messages, + } + if inference_config: + converse_api_params["inferenceConfig"] = inference_config + + # Tool use is not supported in streaming mode + if tool_config and not stream: + converse_api_params["toolConfig"] = tool_config + if system_bedrock_messages: + converse_api_params["system"] = system_bedrock_messages + + if not stream: + converse_api_res = self.client.converse(**converse_api_params) + + output_message = BedrockInferenceAdapter._bedrock_message_to_message( + converse_api_res + ) + + yield ChatCompletionResponse( + completion_message=output_message, + logprobs=None, + ) + else: + converse_stream_api_res = self.client.converse_stream(**converse_api_params) + event_stream = converse_stream_api_res["stream"] + + for chunk in event_stream: + if "messageStart" in chunk: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + elif "contentBlockStart" in chunk: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=ToolCall( + tool_name=chunk["contentBlockStart"]["toolUse"][ + "name" + ], + call_id=chunk["contentBlockStart"]["toolUse"][ + "toolUseId" + ], + ), + parse_status=ToolCallParseStatus.started, + ), + ) + ) + elif "contentBlockDelta" in chunk: + if "text" in chunk["contentBlockDelta"]["delta"]: + delta = chunk["contentBlockDelta"]["delta"]["text"] + else: + delta = ToolCallDelta( + content=ToolCall( + arguments=chunk["contentBlockDelta"]["delta"][ + "toolUse" + ]["input"] + ), + parse_status=ToolCallParseStatus.success, + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=delta, + ) + ) + elif "contentBlockStop" in chunk: + # Ignored + pass + elif "messageStop" in chunk: + stop_reason = ( + BedrockInferenceAdapter._bedrock_stop_reason_to_stop_reason( + chunk["messageStop"]["stopReason"] + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) + elif "metadata" in chunk: + # Ignored + pass + else: + # Ignored + pass diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index f6949cbdc..061e281be 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -13,7 +13,7 @@ from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer -from llama_stack.providers.utils.inference.routable import RoutableProviderForModels +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( @@ -30,9 +30,9 @@ FIREWORKS_SUPPORTED_MODELS = { } -class FireworksInferenceAdapter(Inference, RoutableProviderForModels): +class FireworksInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: FireworksImplConfig) -> None: - RoutableProviderForModels.__init__( + ModelRegistryHelper.__init__( self, stack_to_provider_models_map=FIREWORKS_SUPPORTED_MODELS ) self.config = config diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index bd267a5f8..bc1b3d103 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -18,7 +18,7 @@ from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, ) -from llama_stack.providers.utils.inference.routable import RoutableProviderForModels +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper # TODO: Eventually this will move to the llama cli model list command # mapping of Model SKUs to ollama models @@ -27,12 +27,13 @@ OLLAMA_SUPPORTED_SKUS = { "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16", "Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16", "Llama3.2-3B-Instruct": "llama3.2:3b-instruct-fp16", + "Llama-Guard-3-8B": "xe/llamaguard3:latest", } -class OllamaInferenceAdapter(Inference, RoutableProviderForModels): +class OllamaInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, url: str) -> None: - RoutableProviderForModels.__init__( + ModelRegistryHelper.__init__( self, stack_to_provider_models_map=OLLAMA_SUPPORTED_SKUS ) self.url = url diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 9f73a81d1..2ee90d8e3 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -18,7 +18,7 @@ from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, ) -from llama_stack.providers.utils.inference.routable import RoutableProviderForModels +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from .config import TogetherImplConfig @@ -34,10 +34,10 @@ TOGETHER_SUPPORTED_MODELS = { class TogetherInferenceAdapter( - Inference, NeedsRequestProviderData, RoutableProviderForModels + ModelRegistryHelper, Inference, NeedsRequestProviderData ): def __init__(self, config: TogetherImplConfig) -> None: - RoutableProviderForModels.__init__( + ModelRegistryHelper.__init__( self, stack_to_provider_models_map=TOGETHER_SUPPORTED_MODELS ) self.config = config diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index dca4ea6fb..9c6654ad1 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -12,7 +12,6 @@ from llama_models.sku_list import resolve_model from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, ) @@ -25,24 +24,39 @@ from .model_parallel import LlamaModelParallelGenerator SEMAPHORE = asyncio.Semaphore(1) -class MetaReferenceInferenceImpl(Inference, RoutableProvider): +class MetaReferenceInferenceImpl(Inference): def __init__(self, config: MetaReferenceImplConfig) -> None: self.config = config model = resolve_model(config.model) if model is None: raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model + self.registered_model_defs = [] # verify that the checkpoint actually is for this model lol async def initialize(self) -> None: self.generator = LlamaModelParallelGenerator(self.config) self.generator.start() - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - assert ( - len(routing_keys) == 1 - ), f"Only one routing key is supported {routing_keys}" - assert routing_keys[0] == self.config.model + async def register_model(self, model: ModelDef) -> None: + existing = await self.get_model(model.identifier) + if existing is not None: + return + + if model.identifier != self.model.descriptor(): + raise RuntimeError( + f"Model mismatch: {model.identifier} != {self.model.descriptor()}" + ) + self.registered_model_defs = [model] + + async def list_models(self) -> List[ModelDef]: + return self.registered_model_defs + + async def get_model(self, identifier: str) -> Optional[ModelDef]: + for model in self.registered_model_defs: + if model.identifier == identifier: + return model + return None async def shutdown(self) -> None: self.generator.stop() diff --git a/llama_stack/providers/impls/meta_reference/memory/faiss.py b/llama_stack/providers/impls/meta_reference/memory/faiss.py index 4f592e5e0..1534971cd 100644 --- a/llama_stack/providers/impls/meta_reference/memory/faiss.py +++ b/llama_stack/providers/impls/meta_reference/memory/faiss.py @@ -13,7 +13,6 @@ import numpy as np from numpy.typing import NDArray from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider from llama_stack.apis.memory import * # noqa: F403 from llama_stack.providers.utils.memory.vector_store import ( @@ -62,7 +61,7 @@ class FaissIndex(EmbeddingIndex): return QueryDocumentsResponse(chunks=chunks, scores=scores) -class FaissMemoryImpl(Memory, RoutableProvider): +class FaissMemoryImpl(Memory): def __init__(self, config: FaissImplConfig) -> None: self.config = config self.cache = {} @@ -83,7 +82,6 @@ class FaissMemoryImpl(Memory, RoutableProvider): bank=memory_bank, index=FaissIndex(ALL_MINILM_L6_V2_DIMENSION) ) self.cache[memory_bank.identifier] = index - return bank async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: index = self.cache.get(identifier) diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py new file mode 100644 index 000000000..dabf698d4 --- /dev/null +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -0,0 +1,51 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Dict, List + +from llama_models.sku_list import resolve_model + +from llama_stack.apis.models import * # noqa: F403 + + +class ModelRegistryHelper: + + def __init__(self, stack_to_provider_models_map: Dict[str, str]): + self.stack_to_provider_models_map = stack_to_provider_models_map + self.registered_models = [] + + def map_to_provider_model(self, identifier: str) -> str: + model = resolve_model(identifier) + if not model: + raise ValueError(f"Unknown model: `{identifier}`") + + if identifier not in self.stack_to_provider_models_map: + raise ValueError( + f"Model {identifier} not found in map {self.stack_to_provider_models_map}" + ) + + return self.stack_to_provider_models_map[identifier] + + async def register_model(self, model: ModelDef) -> None: + existing = await self.get_model(model.identifier) + if existing is not None: + return + + if model.identifier not in self.stack_to_provider_models_map: + raise ValueError( + f"Unsupported model {model.identifier}. Supported models: {self.stack_to_provider_models_map.keys()}" + ) + + self.registered_models.append(model) + + async def list_models(self) -> List[ModelDef]: + return self.registered_models + + async def get_model(self, identifier: str) -> Optional[ModelDef]: + for model in self.registered_models: + if model.identifier == identifier: + return model + return None diff --git a/llama_stack/providers/utils/inference/routable.py b/llama_stack/providers/utils/inference/routable.py deleted file mode 100644 index a36631208..000000000 --- a/llama_stack/providers/utils/inference/routable.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from typing import Dict, List - -from llama_models.sku_list import resolve_model - -from llama_stack.distribution.datatypes import RoutableProvider - - -class RoutableProviderForModels(RoutableProvider): - - def __init__(self, stack_to_provider_models_map: Dict[str, str]): - self.stack_to_provider_models_map = stack_to_provider_models_map - - async def validate_routing_keys(self, routing_keys: List[str]): - for routing_key in routing_keys: - if routing_key not in self.stack_to_provider_models_map: - raise ValueError( - f"Routing key {routing_key} not found in map {self.stack_to_provider_models_map}" - ) - - def map_to_provider_model(self, routing_key: str) -> str: - model = resolve_model(routing_key) - if not model: - raise ValueError(f"Unknown model: `{routing_key}`") - - if routing_key not in self.stack_to_provider_models_map: - raise ValueError( - f"Model {routing_key} not found in map {self.stack_to_provider_models_map}" - ) - - return self.stack_to_provider_models_map[routing_key] From 60dead6196dd1157bece6f6b2b4699b92632e6ca Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 23:16:11 -0700 Subject: [PATCH 38/69] apis_to_serve -> apis --- llama_stack/cli/stack/configure.py | 2 +- llama_stack/cli/stack/run.py | 2 + llama_stack/distribution/configure.py | 9 ++- llama_stack/distribution/datatypes.py | 64 ++++++++----------- llama_stack/distribution/resolver.py | 2 +- llama_stack/distribution/server/server.py | 4 +- .../adapters/inference/ollama/ollama.py | 3 +- 7 files changed, 38 insertions(+), 48 deletions(-) diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index 76ade470e..9aa7e2f6e 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -154,7 +154,7 @@ class StackConfigure(Subcommand): config = StackRunConfig( built_at=datetime.now(), image_name=image_name, - apis_to_serve=[], + apis=[], providers={}, models=[], shields=[], diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 1c528baed..033b2a81f 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -46,6 +46,7 @@ class StackRun(Subcommand): import pkg_resources import yaml + from termcolor import cprint from llama_stack.distribution.build import ImageType from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR @@ -75,6 +76,7 @@ class StackRun(Subcommand): ) return + cprint(f"Using config `{config_file}`", "green") with open(config_file, "r") as f: config = StackRunConfig(**yaml.safe_load(f)) diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index b40cff242..f343c13bb 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -64,8 +64,8 @@ def configure_api_providers( ) -> StackRunConfig: is_nux = len(config.providers) == 0 - apis = set((config.apis_to_serve or list(build_spec.providers.keys()))) - config.apis_to_serve = [a for a in apis if a != "telemetry"] + apis = set((config.apis or list(build_spec.providers.keys()))) + config.apis = [a for a in apis if a != "telemetry"] if is_nux: print( @@ -79,7 +79,7 @@ def configure_api_providers( provider_registry = get_provider_registry() builtin_apis = [a.routing_table_api for a in builtin_automatically_routed_apis()] - for api_str in config.apis_to_serve: + for api_str in config.apis: api = Api(api_str) if api in builtin_apis: continue @@ -342,6 +342,9 @@ def upgrade_from_routing_table_to_registry( del config_dict["routing_table"] del config_dict["api_providers"] + config_dict["apis"] = config_dict["apis_to_serve"] + del config_dict["apis_to_serve"] + return config_dict diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index 05b2ad0d6..c987d4c87 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -39,15 +39,6 @@ RoutedProtocol = Union[ ] -class GenericProviderConfig(BaseModel): - provider_type: str - config: Dict[str, Any] - - -class RoutableProviderConfig(GenericProviderConfig): - routing_key: RoutingKey - - # Example: /inference, /safety class AutoRoutedProviderSpec(ProviderSpec): provider_type: str = "router" @@ -92,7 +83,6 @@ in the runtime configuration to help route to the correct provider.""", ) -# TODO: rename as ProviderInstanceConfig class Provider(BaseModel): provider_id: str provider_type: str @@ -118,40 +108,36 @@ this could be just a hash default=None, description="Reference to the conda environment if this package refers to a conda environment", ) - apis_to_serve: List[str] = Field( + apis: List[str] = Field( description=""" The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", ) - providers: Dict[str, List[Provider]] + providers: Dict[str, List[Provider]] = Field( + description=""" +One or more providers to use for each API. The same provider_type (e.g., meta-reference) +can be instantiated multiple times (with different configs) if necessary. +""", + ) - models: List[ModelDef] - shields: List[ShieldDef] - memory_banks: List[MemoryBankDef] - - -# api_providers: Dict[ -# str, Union[GenericProviderConfig, PlaceholderProviderConfig] -# ] = Field( -# description=""" -# Provider configurations for each of the APIs provided by this package. -# """, -# ) -# routing_table: Dict[str, List[RoutableProviderConfig]] = Field( -# default_factory=dict, -# description=""" - -# E.g. The following is a ProviderRoutingEntry for models: -# - routing_key: Llama3.1-8B-Instruct -# provider_type: meta-reference -# config: -# model: Llama3.1-8B-Instruct -# quantization: null -# torch_seed: null -# max_seq_len: 4096 -# max_batch_size: 1 -# """, -# ) + models: List[ModelDef] = Field( + description=""" +List of model definitions to serve. This list may get extended by +/models/register API calls at runtime. +""", + ) + shields: List[ShieldDef] = Field( + description=""" +List of shield definitions to serve. This list may get extended by +/shields/register API calls at runtime. +""", + ) + memory_banks: List[MemoryBankDef] = Field( + description=""" +List of memory bank definitions to serve. This list may get extended by +/memory_banks/register API calls at runtime. +""", + ) class BuildConfig(BaseModel): diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 2c383587c..d0c3adb84 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -59,7 +59,7 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An key = api_str if api not in router_apis else f"inner-{api_str}" providers_with_specs[key] = specs - apis_to_serve = run_config.apis_to_serve or set( + apis_to_serve = run_config.apis or set( list(providers_with_specs.keys()) + list(routing_table_apis) ) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index f664bb674..ed3b4b9f2 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -291,8 +291,8 @@ def main( all_endpoints = get_all_api_endpoints() - if config.apis_to_serve: - apis_to_serve = set(config.apis_to_serve) + if config.apis: + apis_to_serve = set(config.apis) else: apis_to_serve = set(impls.keys()) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index bc1b3d103..aa9a25658 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -20,8 +20,7 @@ from llama_stack.providers.utils.inference.augment_messages import ( ) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper -# TODO: Eventually this will move to the llama cli model list command -# mapping of Model SKUs to ollama models + OLLAMA_SUPPORTED_SKUS = { "Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16", "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16", From e45a4175433bfff7513ebb006cf1b8f9e168f258 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 5 Oct 2024 23:48:18 -0700 Subject: [PATCH 39/69] more fixes, plug shutdown handlers still, FastAPIs sigint handler is not calling ours --- .../distribution/routers/routing_tables.py | 3 ++- llama_stack/distribution/server/server.py | 19 +++++++++++++++++-- .../providers/adapters/inference/tgi/tgi.py | 16 ++++++++++------ llama_stack/providers/impls/vllm/vllm.py | 6 +++--- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 350ab05fe..e51534446 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -42,7 +42,8 @@ class CommonRoutingTableImpl(RoutingTable): await self.register_object(obj, p) async def shutdown(self) -> None: - pass + for p in self.impls_by_provider_id.values(): + await p.shutdown() def get_provider_impl(self, routing_key: str) -> Any: if routing_key not in self.routing_key_to_object: diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index ed3b4b9f2..dd3fafd0a 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import asyncio +import functools import inspect import json import signal @@ -169,11 +170,20 @@ async def passthrough( await end_trace(SpanStatus.OK if not erred else SpanStatus.ERROR) -def handle_sigint(*args, **kwargs): +def handle_sigint(app, *args, **kwargs): print("SIGINT or CTRL-C detected. Exiting gracefully...") + + async def run_shutdown(): + for impl in app.__llama_stack_impls__.values(): + print(f"Shutting down {impl}") + await impl.shutdown() + + asyncio.run(run_shutdown()) + loop = asyncio.get_event_loop() for task in asyncio.all_tasks(loop): task.cancel() + loop.stop() @@ -181,7 +191,10 @@ def handle_sigint(*args, **kwargs): async def lifespan(app: FastAPI): print("Starting up") yield + print("Shutting down") + for impl in app.__llama_stack_impls__.values(): + await impl.shutdown() def create_dynamic_passthrough( @@ -333,7 +346,9 @@ def main( print("") app.exception_handler(RequestValidationError)(global_exception_handler) app.exception_handler(Exception)(global_exception_handler) - signal.signal(signal.SIGINT, handle_sigint) + signal.signal(signal.SIGINT, functools.partial(handle_sigint, app)) + + app.__llama_stack_impls__ = impls import uvicorn diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index a5e5a99be..9868a9364 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -13,8 +13,6 @@ from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import StopReason from llama_models.llama3.api.tokenizer import Tokenizer -from llama_stack.distribution.datatypes import RoutableProvider - from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, @@ -25,7 +23,7 @@ from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImpl logger = logging.getLogger(__name__) -class _HfAdapter(Inference, RoutableProvider): +class _HfAdapter(Inference): client: AsyncInferenceClient max_tokens: int model_id: str @@ -34,11 +32,17 @@ class _HfAdapter(Inference, RoutableProvider): self.tokenizer = Tokenizer.get_instance() self.formatter = ChatFormat(self.tokenizer) - async def validate_routing_keys(self, routing_keys: list[str]) -> None: - # these are the model names the Llama Stack will use to route requests to this provider - # perform validation here if necessary + # TODO: make this work properly by checking this against the model_id being + # served by the remote endpoint + async def register_model(self, model: ModelDef) -> None: pass + async def list_models(self) -> List[ModelDef]: + return [] + + async def get_model(self, identifier: str) -> Optional[ModelDef]: + return None + async def shutdown(self) -> None: pass diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py index ecaa6bc45..0f8e8d38c 100644 --- a/llama_stack/providers/impls/vllm/vllm.py +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -42,7 +42,7 @@ from llama_stack.apis.inference.inference import ( from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, ) -from llama_stack.providers.utils.inference.routable import RoutableProviderForModels +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from .config import VLLMConfig @@ -75,7 +75,7 @@ def _vllm_sampling_params(sampling_params: Any) -> SamplingParams: return SamplingParams().from_optional(**kwargs) -class VLLMInferenceImpl(Inference, RoutableProviderForModels): +class VLLMInferenceImpl(Inference, ModelRegistryHelper): """Inference implementation for vLLM.""" HF_MODEL_MAPPINGS = { @@ -109,7 +109,7 @@ class VLLMInferenceImpl(Inference, RoutableProviderForModels): def __init__(self, config: VLLMConfig): Inference.__init__(self) - RoutableProviderForModels.__init__( + ModelRegistryHelper.__init__( self, stack_to_provider_models_map=self.HF_MODEL_MAPPINGS, ) From 91e00635931e609e51389628b7e193144eba0edd Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 16:29:33 -0700 Subject: [PATCH 40/69] Introduce model_store, shield_store, memory_bank_store --- llama_stack/apis/inference/inference.py | 12 +-- llama_stack/apis/memory/memory.py | 12 +-- llama_stack/apis/safety/safety.py | 12 +-- llama_stack/distribution/datatypes.py | 8 ++ llama_stack/distribution/resolver.py | 1 + .../distribution/routers/routing_tables.py | 44 +++++--- .../inference/databricks/databricks.py | 39 +++---- .../adapters/inference/ollama/ollama.py | 4 +- .../adapters/inference/sample/sample.py | 6 +- .../providers/adapters/inference/tgi/tgi.py | 17 +-- .../adapters/memory/chroma/chroma.py | 47 +++----- .../adapters/memory/pgvector/pgvector.py | 90 +++------------- .../adapters/memory/sample/sample.py | 6 +- .../adapters/safety/bedrock/bedrock.py | 102 +++++++++--------- .../adapters/safety/sample/sample.py | 6 +- .../adapters/safety/together/together.py | 22 ++-- .../impls/meta_reference/memory/faiss.py | 9 -- .../impls/meta_reference/safety/safety.py | 14 +-- .../utils/inference/model_registry.py | 18 +--- 19 files changed, 172 insertions(+), 297 deletions(-) diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 5374f2efb..7ff70a2af 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -173,7 +173,13 @@ class EmbeddingsResponse(BaseModel): embeddings: List[List[float]] +class ModelStore(Protocol): + def get_model(self, identifier: str) -> ModelDef: ... + + class Inference(Protocol): + model_store: ModelStore + @webmethod(route="/inference/completion") async def completion( self, @@ -207,9 +213,3 @@ class Inference(Protocol): @webmethod(route="/inference/register_model") async def register_model(self, model: ModelDef) -> None: ... - - @webmethod(route="/inference/list_models") - async def list_models(self) -> List[ModelDef]: ... - - @webmethod(route="/inference/get_model") - async def get_model(self, identifier: str) -> Optional[ModelDef]: ... diff --git a/llama_stack/apis/memory/memory.py b/llama_stack/apis/memory/memory.py index 86dcbbcdc..c5161e864 100644 --- a/llama_stack/apis/memory/memory.py +++ b/llama_stack/apis/memory/memory.py @@ -38,7 +38,13 @@ class QueryDocumentsResponse(BaseModel): scores: List[float] +class MemoryBankStore(Protocol): + def get_memory_bank(self, bank_id: str) -> Optional[MemoryBankDef]: ... + + class Memory(Protocol): + memory_bank_store: MemoryBankStore + # this will just block now until documents are inserted, but it should # probably return a Job instance which can be polled for completion @webmethod(route="/memory/insert") @@ -80,9 +86,3 @@ class Memory(Protocol): @webmethod(route="/memory/register_memory_bank") async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: ... - - @webmethod(route="/memory/list_memory_banks") - async def list_memory_banks(self) -> List[MemoryBankDef]: ... - - @webmethod(route="/memory/get_memory_bank") - async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: ... diff --git a/llama_stack/apis/safety/safety.py b/llama_stack/apis/safety/safety.py index a3c94d136..4f4a49407 100644 --- a/llama_stack/apis/safety/safety.py +++ b/llama_stack/apis/safety/safety.py @@ -38,7 +38,13 @@ class RunShieldResponse(BaseModel): violation: Optional[SafetyViolation] = None +class ShieldStore(Protocol): + def get_shield(self, identifier: str) -> ShieldDef: ... + + class Safety(Protocol): + shield_store: ShieldStore + @webmethod(route="/safety/run_shield") async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None @@ -46,9 +52,3 @@ class Safety(Protocol): @webmethod(route="/safety/register_shield") async def register_shield(self, shield: ShieldDef) -> None: ... - - @webmethod(route="/safety/list_shields") - async def list_shields(self) -> List[ShieldDef]: ... - - @webmethod(route="/safety/get_shield") - async def get_shield(self, identifier: str) -> Optional[ShieldDef]: ... diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index c987d4c87..f08eec462 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -39,6 +39,14 @@ RoutedProtocol = Union[ ] +class ModelRegistry(Protocol): + def get_model(self, identifier: str) -> ModelDef: ... + + +class MemoryBankRegistry(Protocol): + def get_memory_bank(self, identifier: str) -> MemoryBankDef: ... + + # Example: /inference, /safety class AutoRoutedProviderSpec(ProviderSpec): provider_type: str = "router" diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index d0c3adb84..0adb42915 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -151,6 +151,7 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An deps, inner_impls, ) + # TODO: ugh slightly redesign this shady looking code if "inner-" in api_str: inner_impls_by_provider_id[api_str][provider.provider_id] = impl else: diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index e51534446..ef38b6391 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -15,6 +15,20 @@ from llama_stack.apis.memory_banks import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 +def get_impl_api(p: Any) -> Api: + return p.__provider_spec__.api + + +async def register_object_with_provider(obj: RoutableObject, p: Any) -> None: + api = get_impl_api(p) + if api == Api.inference: + await p.register_model(obj) + elif api == Api.safety: + await p.register_shield(obj) + elif api == Api.memory: + await p.register_memory_bank(obj) + + # TODO: this routing table maintains state in memory purely. We need to # add persistence to it when we add dynamic registration of objects. class CommonRoutingTableImpl(RoutingTable): @@ -32,6 +46,15 @@ class CommonRoutingTableImpl(RoutingTable): self.impls_by_provider_id = impls_by_provider_id self.registry = registry + for p in self.impls_by_provider_id.values(): + api = get_impl_api(p) + if api == Api.inference: + p.model_store = self + elif api == Api.safety: + p.shield_store = self + elif api == Api.memory: + p.memory_bank_store = self + self.routing_key_to_object = {} for obj in self.registry: self.routing_key_to_object[obj.identifier] = obj @@ -39,7 +62,7 @@ class CommonRoutingTableImpl(RoutingTable): async def initialize(self) -> None: for obj in self.registry: p = self.impls_by_provider_id[obj.provider_id] - await self.register_object(obj, p) + await register_object_with_provider(obj, p) async def shutdown(self) -> None: for p in self.impls_by_provider_id.values(): @@ -57,7 +80,7 @@ class CommonRoutingTableImpl(RoutingTable): return obj return None - async def register_object_common(self, obj: RoutableObject) -> None: + async def register_object(self, obj: RoutableObject) -> Any: if obj.identifier in self.routing_key_to_object: raise ValueError(f"Object `{obj.identifier}` already registered") @@ -65,16 +88,13 @@ class CommonRoutingTableImpl(RoutingTable): raise ValueError(f"Provider `{obj.provider_id}` not found") p = self.impls_by_provider_id[obj.provider_id] - await p.register_object(obj) + await register_object_with_provider(obj, p) self.routing_key_to_object[obj.identifier] = obj self.registry.append(obj) class ModelsRoutingTable(CommonRoutingTableImpl, Models): - async def register_object(self, obj: ModelDef, p: Inference) -> None: - await p.register_model(obj) - async def list_models(self) -> List[ModelDef]: return self.registry @@ -82,13 +102,10 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models): return self.get_object_by_identifier(identifier) async def register_model(self, model: ModelDef) -> None: - await self.register_object_common(model) + await self.register_object(model) class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): - async def register_object(self, obj: ShieldDef, p: Safety) -> None: - await p.register_shield(obj) - async def list_shields(self) -> List[ShieldDef]: return self.registry @@ -96,13 +113,10 @@ class ShieldsRoutingTable(CommonRoutingTableImpl, Shields): return self.get_object_by_identifier(shield_type) async def register_shield(self, shield: ShieldDef) -> None: - await self.register_object_common(shield) + await self.register_object(shield) class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): - async def register_object(self, obj: MemoryBankDef, p: Memory) -> None: - await p.register_memory_bank(obj) - async def list_memory_banks(self) -> List[MemoryBankDef]: return self.registry @@ -110,4 +124,4 @@ class MemoryBanksRoutingTable(CommonRoutingTableImpl, MemoryBanks): return self.get_object_by_identifier(identifier) async def register_memory_bank(self, bank: MemoryBankDef) -> None: - await self.register_object_common(bank) + await self.register_object(bank) diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index eeffb938d..6d106ccf1 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -6,39 +6,40 @@ from typing import AsyncGenerator -from openai import OpenAI - from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import Message, StopReason from llama_models.llama3.api.tokenizer import Tokenizer -from llama_models.sku_list import resolve_model + +from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( augment_messages_for_tools, ) +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from .config import DatabricksImplConfig + DATABRICKS_SUPPORTED_MODELS = { "Llama3.1-70B-Instruct": "databricks-meta-llama-3-1-70b-instruct", "Llama3.1-405B-Instruct": "databricks-meta-llama-3-1-405b-instruct", } -class DatabricksInferenceAdapter(Inference): +class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, config: DatabricksImplConfig) -> None: + ModelRegistryHelper.__init__( + self, stack_to_provider_models_map=DATABRICKS_SUPPORTED_MODELS + ) self.config = config tokenizer = Tokenizer.get_instance() self.formatter = ChatFormat(tokenizer) @property def client(self) -> OpenAI: - return OpenAI( - base_url=self.config.url, - api_key=self.config.api_token - ) + return OpenAI(base_url=self.config.url, api_key=self.config.api_token) async def initialize(self) -> None: return @@ -65,18 +66,6 @@ class DatabricksInferenceAdapter(Inference): return databricks_messages - def resolve_databricks_model(self, model_name: str) -> str: - model = resolve_model(model_name) - assert ( - model is not None - and model.descriptor(shorten_default_variant=True) - in DATABRICKS_SUPPORTED_MODELS - ), f"Unsupported model: {model_name}, use one of the supported models: {','.join(DATABRICKS_SUPPORTED_MODELS.keys())}" - - return DATABRICKS_SUPPORTED_MODELS.get( - model.descriptor(shorten_default_variant=True) - ) - def get_databricks_chat_options(self, request: ChatCompletionRequest) -> dict: options = {} if request.sampling_params is not None: @@ -110,10 +99,9 @@ class DatabricksInferenceAdapter(Inference): messages = augment_messages_for_tools(request) options = self.get_databricks_chat_options(request) - databricks_model = self.resolve_databricks_model(request.model) + databricks_model = self.map_to_provider_model(request.model) if not request.stream: - r = self.client.chat.completions.create( model=databricks_model, messages=self._messages_to_databricks_messages(messages), @@ -154,10 +142,7 @@ class DatabricksInferenceAdapter(Inference): **options, ): if chunk.choices[0].finish_reason: - if ( - stop_reason is None - and chunk.choices[0].finish_reason == "stop" - ): + if stop_reason is None and chunk.choices[0].finish_reason == "stop": stop_reason = StopReason.end_of_turn elif ( stop_reason is None @@ -254,4 +239,4 @@ class DatabricksInferenceAdapter(Inference): delta="", stop_reason=stop_reason, ) - ) \ No newline at end of file + ) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index aa9a25658..09af46b11 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -21,7 +21,7 @@ from llama_stack.providers.utils.inference.augment_messages import ( from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper -OLLAMA_SUPPORTED_SKUS = { +OLLAMA_SUPPORTED_MODELS = { "Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16", "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16", "Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16", @@ -33,7 +33,7 @@ OLLAMA_SUPPORTED_SKUS = { class OllamaInferenceAdapter(ModelRegistryHelper, Inference): def __init__(self, url: str) -> None: ModelRegistryHelper.__init__( - self, stack_to_provider_models_map=OLLAMA_SUPPORTED_SKUS + self, stack_to_provider_models_map=OLLAMA_SUPPORTED_MODELS ) self.url = url tokenizer = Tokenizer.get_instance() diff --git a/llama_stack/providers/adapters/inference/sample/sample.py b/llama_stack/providers/adapters/inference/sample/sample.py index 7d4e4a837..09171e395 100644 --- a/llama_stack/providers/adapters/inference/sample/sample.py +++ b/llama_stack/providers/adapters/inference/sample/sample.py @@ -9,14 +9,12 @@ from .config import SampleConfig from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider - -class SampleInferenceImpl(Inference, RoutableProvider): +class SampleInferenceImpl(Inference): def __init__(self, config: SampleConfig): self.config = config - async def validate_routing_keys(self, routing_keys: list[str]) -> None: + async def register_model(self, model: ModelDef) -> None: # these are the model names the Llama Stack will use to route requests to this provider # perform validation here if necessary pass diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index 9868a9364..538c11ec7 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -12,6 +12,7 @@ from huggingface_hub import AsyncInferenceClient, HfApi from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.datatypes import StopReason from llama_models.llama3.api.tokenizer import Tokenizer +from llama_models.sku_list import resolve_model from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( @@ -32,16 +33,18 @@ class _HfAdapter(Inference): self.tokenizer = Tokenizer.get_instance() self.formatter = ChatFormat(self.tokenizer) - # TODO: make this work properly by checking this against the model_id being - # served by the remote endpoint async def register_model(self, model: ModelDef) -> None: - pass + resolved_model = resolve_model(model.identifier) + if resolved_model is None: + raise ValueError(f"Unknown model: {model.identifier}") - async def list_models(self) -> List[ModelDef]: - return [] + if not resolved_model.huggingface_repo: + raise ValueError( + f"Model {model.identifier} does not have a HuggingFace repo" + ) - async def get_model(self, identifier: str) -> Optional[ModelDef]: - return None + if self.model_id != resolved_model.huggingface_repo: + raise ValueError(f"Model mismatch: {model.identifier} != {self.model_id}") async def shutdown(self) -> None: pass diff --git a/llama_stack/providers/adapters/memory/chroma/chroma.py b/llama_stack/providers/adapters/memory/chroma/chroma.py index afa13111f..f720159a5 100644 --- a/llama_stack/providers/adapters/memory/chroma/chroma.py +++ b/llama_stack/providers/adapters/memory/chroma/chroma.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import json -import uuid from typing import List from urllib.parse import urlparse @@ -13,7 +12,6 @@ import chromadb from numpy.typing import NDArray from llama_stack.apis.memory import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider from llama_stack.providers.utils.memory.vector_store import ( BankWithIndex, @@ -65,7 +63,7 @@ class ChromaIndex(EmbeddingIndex): return QueryDocumentsResponse(chunks=chunks, scores=scores) -class ChromaMemoryAdapter(Memory, RoutableProvider): +class ChromaMemoryAdapter(Memory): def __init__(self, url: str) -> None: print(f"Initializing ChromaMemoryAdapter with url: {url}") url = url.rstrip("/") @@ -93,48 +91,33 @@ class ChromaMemoryAdapter(Memory, RoutableProvider): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - print(f"[chroma] Registering memory bank routing keys: {routing_keys}") - pass - - async def create_memory_bank( + async def register_memory_bank( self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - bank_id = str(uuid.uuid4()) - bank = MemoryBank( - bank_id=bank_id, - name=name, - config=config, - url=url, - ) + memory_bank: MemoryBankDef, + ) -> None: + assert ( + memory_bank.type == MemoryBankType.vector.value + ), f"Only vector banks are supported {memory_bank.type}" + collection = await self.client.create_collection( - name=bank_id, - metadata={"bank": bank.json()}, + name=memory_bank.identifier, ) bank_index = BankWithIndex( - bank=bank, index=ChromaIndex(self.client, collection) + bank=memory_bank, index=ChromaIndex(self.client, collection) ) - self.cache[bank_id] = bank_index - return bank - - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: - bank_index = await self._get_and_cache_bank_index(bank_id) - if bank_index is None: - return None - return bank_index.bank + self.cache[memory_bank.identifier] = bank_index async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]: if bank_id in self.cache: return self.cache[bank_id] + bank = await self.memory_bank_store.get_memory_bank(bank_id) + if bank is None: + raise ValueError(f"Bank {bank_id} not found") + collections = await self.client.list_collections() for collection in collections: if collection.name == bank_id: - print(collection.metadata) - bank = MemoryBank(**json.loads(collection.metadata["bank"])) index = BankWithIndex( bank=bank, index=ChromaIndex(self.client, collection), diff --git a/llama_stack/providers/adapters/memory/pgvector/pgvector.py b/llama_stack/providers/adapters/memory/pgvector/pgvector.py index 5864aa7dc..c5dc1f4be 100644 --- a/llama_stack/providers/adapters/memory/pgvector/pgvector.py +++ b/llama_stack/providers/adapters/memory/pgvector/pgvector.py @@ -4,18 +4,14 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import uuid -from typing import List, Tuple +from typing import List import psycopg2 from numpy.typing import NDArray from psycopg2 import sql from psycopg2.extras import execute_values, Json -from pydantic import BaseModel - from llama_stack.apis.memory import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider from llama_stack.providers.utils.memory.vector_store import ( ALL_MINILM_L6_V2_DIMENSION, @@ -32,33 +28,6 @@ def check_extension_version(cur): return result[0] if result else None -def upsert_models(cur, keys_models: List[Tuple[str, BaseModel]]): - query = sql.SQL( - """ - INSERT INTO metadata_store (key, data) - VALUES %s - ON CONFLICT (key) DO UPDATE - SET data = EXCLUDED.data - """ - ) - - values = [(key, Json(model.dict())) for key, model in keys_models] - execute_values(cur, query, values, template="(%s, %s)") - - -def load_models(cur, keys: List[str], cls): - query = "SELECT key, data FROM metadata_store" - if keys: - placeholders = ",".join(["%s"] * len(keys)) - query += f" WHERE key IN ({placeholders})" - cur.execute(query, keys) - else: - cur.execute(query) - - rows = cur.fetchall() - return [cls(**row["data"]) for row in rows] - - class PGVectorIndex(EmbeddingIndex): def __init__(self, bank: MemoryBank, dimension: int, cursor): self.cursor = cursor @@ -119,7 +88,7 @@ class PGVectorIndex(EmbeddingIndex): return QueryDocumentsResponse(chunks=chunks, scores=scores) -class PGVectorMemoryAdapter(Memory, RoutableProvider): +class PGVectorMemoryAdapter(Memory): def __init__(self, config: PGVectorConfig) -> None: print(f"Initializing PGVectorMemoryAdapter -> {config.host}:{config.port}") self.config = config @@ -144,14 +113,6 @@ class PGVectorMemoryAdapter(Memory, RoutableProvider): else: raise RuntimeError("Vector extension is not installed.") - self.cursor.execute( - """ - CREATE TABLE IF NOT EXISTS metadata_store ( - key TEXT PRIMARY KEY, - data JSONB - ) - """ - ) except Exception as e: import traceback @@ -161,51 +122,28 @@ class PGVectorMemoryAdapter(Memory, RoutableProvider): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - print(f"[pgvector] Registering memory bank routing keys: {routing_keys}") - pass - - async def create_memory_bank( + async def register_memory_bank( self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - bank_id = str(uuid.uuid4()) - bank = MemoryBank( - bank_id=bank_id, - name=name, - config=config, - url=url, - ) - upsert_models( - self.cursor, - [ - (bank.bank_id, bank), - ], - ) + memory_bank: MemoryBankDef, + ) -> None: + assert ( + memory_bank.type == MemoryBankType.vector.value + ), f"Only vector banks are supported {memory_bank.type}" + index = BankWithIndex( - bank=bank, - index=PGVectorIndex(bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor), + bank=memory_bank, + index=PGVectorIndex(memory_bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor), ) self.cache[bank_id] = index - return bank - - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: - bank_index = await self._get_and_cache_bank_index(bank_id) - if bank_index is None: - return None - return bank_index.bank async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]: if bank_id in self.cache: return self.cache[bank_id] - banks = load_models(self.cursor, [bank_id], MemoryBank) - if not banks: - return None + bank = await self.memory_bank_store.get_memory_bank(bank_id) + if not bank: + raise ValueError(f"Bank {bank_id} not found") - bank = banks[0] index = BankWithIndex( bank=bank, index=PGVectorIndex(bank, ALL_MINILM_L6_V2_DIMENSION, self.cursor), diff --git a/llama_stack/providers/adapters/memory/sample/sample.py b/llama_stack/providers/adapters/memory/sample/sample.py index 7ef4a625d..3431b87d5 100644 --- a/llama_stack/providers/adapters/memory/sample/sample.py +++ b/llama_stack/providers/adapters/memory/sample/sample.py @@ -9,14 +9,12 @@ from .config import SampleConfig from llama_stack.apis.memory import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider - -class SampleMemoryImpl(Memory, RoutableProvider): +class SampleMemoryImpl(Memory): def __init__(self, config: SampleConfig): self.config = config - async def validate_routing_keys(self, routing_keys: list[str]) -> None: + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: # these are the memory banks the Llama Stack will use to route requests to this provider # perform validation here if necessary pass diff --git a/llama_stack/providers/adapters/safety/bedrock/bedrock.py b/llama_stack/providers/adapters/safety/bedrock/bedrock.py index 814704e2c..7fbac2e4b 100644 --- a/llama_stack/providers/adapters/safety/bedrock/bedrock.py +++ b/llama_stack/providers/adapters/safety/bedrock/bedrock.py @@ -7,14 +7,12 @@ import json import logging -import traceback from typing import Any, Dict, List import boto3 from llama_stack.apis.safety import * # noqa from llama_models.llama3.api.datatypes import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider from .config import BedrockSafetyConfig @@ -22,16 +20,17 @@ from .config import BedrockSafetyConfig logger = logging.getLogger(__name__) -SUPPORTED_SHIELD_TYPES = [ - "bedrock_guardrail", +BEDROCK_SUPPORTED_SHIELDS = [ + ShieldType.generic_content_shield.value, ] -class BedrockSafetyAdapter(Safety, RoutableProvider): +class BedrockSafetyAdapter(Safety): def __init__(self, config: BedrockSafetyConfig) -> None: if not config.aws_profile: raise ValueError(f"Missing boto_client aws_profile in model info::{config}") self.config = config + self.registered_shields = [] async def initialize(self) -> None: try: @@ -45,16 +44,27 @@ class BedrockSafetyAdapter(Safety, RoutableProvider): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: List[str]) -> None: - for key in routing_keys: - if key not in SUPPORTED_SHIELD_TYPES: - raise ValueError(f"Unknown safety shield type: {key}") + async def register_shield(self, shield: ShieldDef) -> None: + if shield.type not in BEDROCK_SUPPORTED_SHIELDS: + raise ValueError(f"Unsupported safety shield type: {shield.type}") + + shield_params = shield.params + if "guardrailIdentifier" not in shield_params: + raise ValueError( + "Error running request for BedrockGaurdrails:Missing GuardrailID in request" + ) + + if "guardrailVersion" not in shield_params: + raise ValueError( + "Error running request for BedrockGaurdrails:Missing guardrailVersion in request" + ) async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None ) -> RunShieldResponse: - if shield_type not in SUPPORTED_SHIELD_TYPES: - raise ValueError(f"Unknown safety shield type: {shield_type}") + shield_def = await self.shield_store.get_shield(shield_type) + if not shield_def: + raise ValueError(f"Unknown shield {shield_type}") """This is the implementation for the bedrock guardrails. The input to the guardrails is to be of this format ```content = [ @@ -69,52 +79,38 @@ class BedrockSafetyAdapter(Safety, RoutableProvider): They contain content, role . For now we will extract the content and default the "qualifiers": ["query"] """ - try: - logger.debug(f"run_shield::{params}::messages={messages}") - if "guardrailIdentifier" not in params: - raise RuntimeError( - "Error running request for BedrockGaurdrails:Missing GuardrailID in request" - ) - if "guardrailVersion" not in params: - raise RuntimeError( - "Error running request for BedrockGaurdrails:Missing guardrailVersion in request" - ) + shield_params = shield_def.params + logger.debug(f"run_shield::{shield_params}::messages={messages}") - # - convert the messages into format Bedrock expects - content_messages = [] - for message in messages: - content_messages.append({"text": {"text": message.content}}) - logger.debug( - f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:" - ) + # - convert the messages into format Bedrock expects + content_messages = [] + for message in messages: + content_messages.append({"text": {"text": message.content}}) + logger.debug( + f"run_shield::final:messages::{json.dumps(content_messages, indent=2)}:" + ) - response = self.boto_client.apply_guardrail( - guardrailIdentifier=params.get("guardrailIdentifier"), - guardrailVersion=params.get("guardrailVersion"), - source="OUTPUT", # or 'INPUT' depending on your use case - content=content_messages, - ) - logger.debug(f"run_shield:: response: {response}::") - if response["action"] == "GUARDRAIL_INTERVENED": - user_message = "" - metadata = {} - for output in response["outputs"]: - # guardrails returns a list - however for this implementation we will leverage the last values - user_message = output["text"] - for assessment in response["assessments"]: - # guardrails returns a list - however for this implementation we will leverage the last values - metadata = dict(assessment) - return SafetyViolation( - user_message=user_message, - violation_level=ViolationLevel.ERROR, - metadata=metadata, - ) + response = self.boto_client.apply_guardrail( + guardrailIdentifier=shield_params["guardrailIdentifier"], + guardrailVersion=shield_params["guardrailVersion"], + source="OUTPUT", # or 'INPUT' depending on your use case + content=content_messages, + ) + if response["action"] == "GUARDRAIL_INTERVENED": + user_message = "" + metadata = {} + for output in response["outputs"]: + # guardrails returns a list - however for this implementation we will leverage the last values + user_message = output["text"] + for assessment in response["assessments"]: + # guardrails returns a list - however for this implementation we will leverage the last values + metadata = dict(assessment) - except Exception: - error_str = traceback.format_exc() - logger.error( - f"Error in apply_guardrails:{error_str}:: RETURNING None !!!!!" + return SafetyViolation( + user_message=user_message, + violation_level=ViolationLevel.ERROR, + metadata=metadata, ) return None diff --git a/llama_stack/providers/adapters/safety/sample/sample.py b/llama_stack/providers/adapters/safety/sample/sample.py index a71f5143f..1aecf1ad0 100644 --- a/llama_stack/providers/adapters/safety/sample/sample.py +++ b/llama_stack/providers/adapters/safety/sample/sample.py @@ -9,14 +9,12 @@ from .config import SampleConfig from llama_stack.apis.safety import * # noqa: F403 -from llama_stack.distribution.datatypes import RoutableProvider - -class SampleSafetyImpl(Safety, RoutableProvider): +class SampleSafetyImpl(Safety): def __init__(self, config: SampleConfig): self.config = config - async def validate_routing_keys(self, routing_keys: list[str]) -> None: + async def register_shield(self, shield: ShieldDef) -> None: # these are the safety shields the Llama Stack will use to route requests to this provider # perform validation here if necessary pass diff --git a/llama_stack/providers/adapters/safety/together/together.py b/llama_stack/providers/adapters/safety/together/together.py index 9d9fa6a4e..fa6ec395d 100644 --- a/llama_stack/providers/adapters/safety/together/together.py +++ b/llama_stack/providers/adapters/safety/together/together.py @@ -12,7 +12,7 @@ from llama_stack.distribution.request_headers import NeedsRequestProviderData from .config import TogetherSafetyConfig -SAFETY_SHIELD_MODEL_MAP = { +TOGETHER_SHIELD_MODEL_MAP = { "llama_guard": "meta-llama/Meta-Llama-Guard-3-8B", "Llama-Guard-3-8B": "meta-llama/Meta-Llama-Guard-3-8B", "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision-Turbo", @@ -22,7 +22,6 @@ SAFETY_SHIELD_MODEL_MAP = { class TogetherSafetyImpl(Safety, NeedsRequestProviderData): def __init__(self, config: TogetherSafetyConfig) -> None: self.config = config - self.register_shields = [] async def initialize(self) -> None: pass @@ -34,26 +33,15 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData): if shield.type != ShieldType.llama_guard.value: raise ValueError(f"Unsupported safety shield type: {shield.type}") - self.registered_shields.append(shield) - - async def list_shields(self) -> List[ShieldDef]: - return self.registered_shields - - async def get_shield(self, identifier: str) -> Optional[ShieldDef]: - for shield in self.registered_shields: - if shield.identifier == identifier: - return shield - return None - async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None ) -> RunShieldResponse: - shield_def = await self.get_shield(shield_type) + shield_def = await self.shield_store.get_shield(shield_type) if not shield_def: raise ValueError(f"Unknown shield {shield_type}") model = shield_def.params.get("model", "llama_guard") - if model not in SAFETY_SHIELD_MODEL_MAP: + if model not in TOGETHER_SHIELD_MODEL_MAP: raise ValueError(f"Unsupported safety model: {model}") together_api_key = None @@ -73,7 +61,9 @@ class TogetherSafetyImpl(Safety, NeedsRequestProviderData): if message.role in (Role.user.value, Role.assistant.value): api_messages.append({"role": message.role, "content": message.content}) - violation = await get_safety_response(together_api_key, model, api_messages) + violation = await get_safety_response( + together_api_key, TOGETHER_SHIELD_MODEL_MAP[model], api_messages + ) return RunShieldResponse(violation=violation) diff --git a/llama_stack/providers/impls/meta_reference/memory/faiss.py b/llama_stack/providers/impls/meta_reference/memory/faiss.py index 1534971cd..7c59f5d59 100644 --- a/llama_stack/providers/impls/meta_reference/memory/faiss.py +++ b/llama_stack/providers/impls/meta_reference/memory/faiss.py @@ -83,15 +83,6 @@ class FaissMemoryImpl(Memory): ) self.cache[memory_bank.identifier] = index - async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: - index = self.cache.get(identifier) - if index is None: - return None - return index.bank - - async def list_memory_banks(self) -> List[MemoryBankDef]: - return [x.bank for x in self.cache.values()] - async def insert_documents( self, bank_id: str, diff --git a/llama_stack/providers/impls/meta_reference/safety/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py index 5154acd77..5d6747f9f 100644 --- a/llama_stack/providers/impls/meta_reference/safety/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -33,7 +33,6 @@ class MetaReferenceSafetyImpl(Safety): def __init__(self, config: SafetyConfig, deps) -> None: self.config = config self.inference_api = deps[Api.inference] - self.registered_shields = [] self.available_shields = [ShieldType.code_scanner.value] if config.llama_guard_shield: @@ -55,24 +54,13 @@ class MetaReferenceSafetyImpl(Safety): if shield.type not in self.available_shields: raise ValueError(f"Unsupported safety shield type: {shield.type}") - self.registered_shields.append(shield) - - async def list_shields(self) -> List[ShieldDef]: - return self.registered_shields - - async def get_shield(self, identifier: str) -> Optional[ShieldDef]: - for shield in self.registered_shields: - if shield.identifier == identifier: - return shield - return None - async def run_shield( self, shield_type: str, messages: List[Message], params: Dict[str, Any] = None, ) -> RunShieldResponse: - shield_def = await self.get_shield(shield_type) + shield_def = await self.shield_store.get_shield(shield_type) if not shield_def: raise ValueError(f"Unknown shield {shield_type}") diff --git a/llama_stack/providers/utils/inference/model_registry.py b/llama_stack/providers/utils/inference/model_registry.py index dabf698d4..744a89084 100644 --- a/llama_stack/providers/utils/inference/model_registry.py +++ b/llama_stack/providers/utils/inference/model_registry.py @@ -4,7 +4,7 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Dict, List +from typing import Dict from llama_models.sku_list import resolve_model @@ -15,7 +15,6 @@ class ModelRegistryHelper: def __init__(self, stack_to_provider_models_map: Dict[str, str]): self.stack_to_provider_models_map = stack_to_provider_models_map - self.registered_models = [] def map_to_provider_model(self, identifier: str) -> str: model = resolve_model(identifier) @@ -30,22 +29,7 @@ class ModelRegistryHelper: return self.stack_to_provider_models_map[identifier] async def register_model(self, model: ModelDef) -> None: - existing = await self.get_model(model.identifier) - if existing is not None: - return - if model.identifier not in self.stack_to_provider_models_map: raise ValueError( f"Unsupported model {model.identifier}. Supported models: {self.stack_to_provider_models_map.keys()}" ) - - self.registered_models.append(model) - - async def list_models(self) -> List[ModelDef]: - return self.registered_models - - async def get_model(self, identifier: str) -> Optional[ModelDef]: - for model in self.registered_models: - if model.identifier == identifier: - return model - return None From 1550187cd8a5883f62e23e43f7fbbc3a37981cd9 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 17:20:33 -0700 Subject: [PATCH 41/69] cleanup --- .../impls/meta_reference/inference/inference.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index 9c6654ad1..f36d65c3f 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -31,7 +31,6 @@ class MetaReferenceInferenceImpl(Inference): if model is None: raise RuntimeError(f"Unknown model: {config.model}, Run `llama model list`") self.model = model - self.registered_model_defs = [] # verify that the checkpoint actually is for this model lol async def initialize(self) -> None: @@ -39,24 +38,10 @@ class MetaReferenceInferenceImpl(Inference): self.generator.start() async def register_model(self, model: ModelDef) -> None: - existing = await self.get_model(model.identifier) - if existing is not None: - return - if model.identifier != self.model.descriptor(): raise RuntimeError( f"Model mismatch: {model.identifier} != {self.model.descriptor()}" ) - self.registered_model_defs = [model] - - async def list_models(self) -> List[ModelDef]: - return self.registered_model_defs - - async def get_model(self, identifier: str) -> Optional[ModelDef]: - for model in self.registered_model_defs: - if model.identifier == identifier: - return model - return None async def shutdown(self) -> None: self.generator.stop() From 099a95b614413bced3bd0805e0a2bddf902818d0 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 18:02:47 -0700 Subject: [PATCH 42/69] slight upgrade to CLI --- llama_stack/cli/stack/build.py | 36 ++++++++++--------- llama_stack/cli/stack/configure.py | 4 +-- .../adapters/inference/tgi/config.py | 2 +- .../providers/adapters/inference/tgi/tgi.py | 2 +- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 0cedbe901..f6821c8df 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -105,8 +105,7 @@ class StackBuild(Subcommand): import yaml - from llama_stack.distribution.build import ApiInput, build_image, ImageType - + from llama_stack.distribution.build import build_image, ImageType from llama_stack.distribution.utils.config_dirs import DISTRIBS_BASE_DIR from llama_stack.distribution.utils.serialize import EnumEncoder from termcolor import cprint @@ -175,9 +174,11 @@ class StackBuild(Subcommand): ) def _run_stack_build_command(self, args: argparse.Namespace) -> None: + import textwrap import yaml from llama_stack.distribution.distribution import get_provider_registry from prompt_toolkit import prompt + from prompt_toolkit.completion import WordCompleter from prompt_toolkit.validation import Validator from termcolor import cprint @@ -240,27 +241,30 @@ class StackBuild(Subcommand): default="conda", ) - cprint( - "\n Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs.", - color="green", - ) + cprint(textwrap.dedent( + """ + Llama Stack is composed of several APIs working together. Let's select + the provider types (implementations) you want to use for these APIs. + """, + ), + color="green") + + print("Tip: use to see options for the providers.\n") providers = dict() for api, providers_for_api in get_provider_registry().items(): + available_providers = [ + x for x in providers_for_api.keys() if x != "remote" + ] api_provider = prompt( - "> Enter provider for the {} API: (default=meta-reference): ".format( + "> Enter provider for API {}: ".format( api.value ), + completer=WordCompleter(available_providers), + complete_while_typing=True, validator=Validator.from_callable( - lambda x: x in providers_for_api, - error_message="Invalid provider, please enter one of the following: {}".format( - list(providers_for_api.keys()) - ), - ), - default=( - "meta-reference" - if "meta-reference" in providers_for_api - else list(providers_for_api.keys())[0] + lambda x: x in available_providers, + error_message="Invalid provider, use to see options", ), ) diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index 9aa7e2f6e..13899715b 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -71,9 +71,7 @@ class StackConfigure(Subcommand): conda_dir = ( Path(os.path.expanduser("~/.conda/envs")) / f"llamastack-{args.config}" ) - output = subprocess.check_output( - ["bash", "-c", "conda info --json -a"] - ) + output = subprocess.check_output(["bash", "-c", "conda info --json"]) conda_envs = json.loads(output.decode("utf-8"))["envs"] for x in conda_envs: diff --git a/llama_stack/providers/adapters/inference/tgi/config.py b/llama_stack/providers/adapters/inference/tgi/config.py index 233205066..6ce2b9dc6 100644 --- a/llama_stack/providers/adapters/inference/tgi/config.py +++ b/llama_stack/providers/adapters/inference/tgi/config.py @@ -34,7 +34,7 @@ class InferenceEndpointImplConfig(BaseModel): @json_schema_type class InferenceAPIImplConfig(BaseModel): - model_id: str = Field( + huggingface_repo: str = Field( description="The model ID of the model on the Hugging Face Hub (e.g. 'meta-llama/Meta-Llama-3.1-70B-Instruct')", ) api_token: Optional[str] = Field( diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index 538c11ec7..24b664068 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -243,7 +243,7 @@ class TGIAdapter(_HfAdapter): class InferenceAPIAdapter(_HfAdapter): async def initialize(self, config: InferenceAPIImplConfig) -> None: self.client = AsyncInferenceClient( - model=config.model_id, token=config.api_token + model=config.huggingface_repo, token=config.api_token ) endpoint_info = await self.client.get_endpoint_info() self.max_tokens = endpoint_info["max_total_tokens"] From 3725e74906a830dbc121cfc7f235eef86a7e6913 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 22:00:54 -0700 Subject: [PATCH 43/69] memory bank registration fixes --- llama_stack/apis/inspect/inspect.py | 4 +- llama_stack/apis/memory_banks/client.py | 62 +++++++++++++++---- llama_stack/cli/stack/build.py | 14 ++--- llama_stack/distribution/configure.py | 7 ++- llama_stack/distribution/inspect.py | 30 +++++---- llama_stack/distribution/resolver.py | 12 +++- llama_stack/distribution/routers/routers.py | 22 +------ .../distribution/routers/routing_tables.py | 19 ++++-- 8 files changed, 108 insertions(+), 62 deletions(-) diff --git a/llama_stack/apis/inspect/inspect.py b/llama_stack/apis/inspect/inspect.py index ca444098c..a30f39a16 100644 --- a/llama_stack/apis/inspect/inspect.py +++ b/llama_stack/apis/inspect/inspect.py @@ -12,15 +12,15 @@ from pydantic import BaseModel @json_schema_type class ProviderInfo(BaseModel): + provider_id: str provider_type: str - description: str @json_schema_type class RouteInfo(BaseModel): route: str method: str - providers: List[str] + provider_types: List[str] @json_schema_type diff --git a/llama_stack/apis/memory_banks/client.py b/llama_stack/apis/memory_banks/client.py index 78a991374..3b763d1f3 100644 --- a/llama_stack/apis/memory_banks/client.py +++ b/llama_stack/apis/memory_banks/client.py @@ -5,8 +5,9 @@ # the root directory of this source tree. import asyncio +import json -from typing import List, Optional +from typing import Any, Dict, List, Optional import fire import httpx @@ -15,6 +16,25 @@ from termcolor import cprint from .memory_banks import * # noqa: F403 +def deserialize_memory_bank_def(j: Optional[Dict[str, Any]]) -> MemoryBankDef: + if j is None: + return None + + if "type" not in j: + raise ValueError("Memory bank type not specified") + type = j["type"] + if type == MemoryBankType.vector.value: + return VectorMemoryBankDef(**j) + elif type == MemoryBankType.keyvalue.value: + return KeyValueMemoryBankDef(**j) + elif type == MemoryBankType.keyword.value: + return KeywordMemoryBankDef(**j) + elif type == MemoryBankType.graph.value: + return GraphMemoryBankDef(**j) + else: + raise ValueError(f"Unknown memory bank type: {type}") + + class MemoryBanksClient(MemoryBanks): def __init__(self, base_url: str): self.base_url = base_url @@ -25,37 +45,57 @@ class MemoryBanksClient(MemoryBanks): async def shutdown(self) -> None: pass - async def list_available_memory_banks(self) -> List[MemoryBankSpec]: + async def list_memory_banks(self) -> List[MemoryBankDef]: async with httpx.AsyncClient() as client: response = await client.get( f"{self.base_url}/memory_banks/list", headers={"Content-Type": "application/json"}, ) response.raise_for_status() - return [MemoryBankSpec(**x) for x in response.json()] + return [deserialize_memory_bank_def(x) for x in response.json()] - async def get_serving_memory_bank( - self, bank_type: MemoryBankType - ) -> Optional[MemoryBankSpec]: + async def get_memory_bank( + self, + identifier: str, + ) -> Optional[MemoryBankDef]: async with httpx.AsyncClient() as client: response = await client.get( f"{self.base_url}/memory_banks/get", params={ - "bank_type": bank_type.value, + "identifier": identifier, }, headers={"Content-Type": "application/json"}, ) response.raise_for_status() j = response.json() - if j is None: - return None - return MemoryBankSpec(**j) + return deserialize_memory_bank_def(j) + + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: + async with httpx.AsyncClient() as client: + response = await client.post( + f"{self.base_url}/memory/register_memory_bank", + json={ + "memory_bank": json.loads(memory_bank.json()), + }, + headers={"Content-Type": "application/json"}, + ) + response.raise_for_status() async def run_main(host: str, port: int, stream: bool): client = MemoryBanksClient(f"http://{host}:{port}") - response = await client.list_available_memory_banks() + await client.register_memory_bank( + VectorMemoryBankDef( + identifier="test_bank", + provider_id="", + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + overlap_size_in_tokens=64, + ), + ) + + response = await client.list_memory_banks() cprint(f"list_memory_banks response={response}", "green") diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index f6821c8df..f07a0f873 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -241,13 +241,15 @@ class StackBuild(Subcommand): default="conda", ) - cprint(textwrap.dedent( - """ + cprint( + textwrap.dedent( + """ Llama Stack is composed of several APIs working together. Let's select the provider types (implementations) you want to use for these APIs. """, - ), - color="green") + ), + color="green", + ) print("Tip: use to see options for the providers.\n") @@ -257,9 +259,7 @@ class StackBuild(Subcommand): x for x in providers_for_api.keys() if x != "remote" ] api_provider = prompt( - "> Enter provider for API {}: ".format( - api.value - ), + "> Enter provider for API {}: ".format(api.value), completer=WordCompleter(available_providers), complete_while_typing=True, validator=Validator.from_callable( diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index f343c13bb..12f225af2 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -64,8 +64,8 @@ def configure_api_providers( ) -> StackRunConfig: is_nux = len(config.providers) == 0 - apis = set((config.apis or list(build_spec.providers.keys()))) - config.apis = [a for a in apis if a != "telemetry"] + # keep this default so all APIs are served + config.apis = [] if is_nux: print( @@ -79,7 +79,8 @@ def configure_api_providers( provider_registry = get_provider_registry() builtin_apis = [a.routing_table_api for a in builtin_automatically_routed_apis()] - for api_str in config.apis: + apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect)] + for api_str in apis_to_serve: api = Api(api_str) if api in builtin_apis: continue diff --git a/llama_stack/distribution/inspect.py b/llama_stack/distribution/inspect.py index 9963fffd8..f5716ef5e 100644 --- a/llama_stack/distribution/inspect.py +++ b/llama_stack/distribution/inspect.py @@ -8,52 +8,56 @@ from typing import Dict, List from llama_stack.apis.inspect import * # noqa: F403 from pydantic import BaseModel -from llama_stack.distribution.distribution import get_provider_registry from llama_stack.distribution.server.endpoints import get_all_api_endpoints from llama_stack.providers.datatypes import * # noqa: F403 +from llama_stack.distribution.datatypes import * # noqa: F403 class DistributionInspectConfig(BaseModel): - pass + run_config: StackRunConfig -async def get_provider_impl(*args, **kwargs): - impl = DistributionInspectImpl() +async def get_provider_impl(config, deps): + impl = DistributionInspectImpl(config, deps) await impl.initialize() return impl class DistributionInspectImpl(Inspect): - def __init__(self): - pass + def __init__(self, config, deps): + self.config = config + self.deps = deps async def initialize(self) -> None: pass async def list_providers(self) -> Dict[str, List[ProviderInfo]]: + run_config = self.config.run_config + ret = {} - all_providers = get_provider_registry() - for api, providers in all_providers.items(): - ret[api.value] = [ + for api, providers in run_config.providers.items(): + ret[api] = [ ProviderInfo( + provider_id=p.provider_id, provider_type=p.provider_type, - description="Passthrough" if is_passthrough(p) else "", ) - for p in providers.values() + for p in providers ] return ret async def list_routes(self) -> Dict[str, List[RouteInfo]]: + run_config = self.config.run_config + ret = {} all_endpoints = get_all_api_endpoints() - for api, endpoints in all_endpoints.items(): + providers = run_config.providers.get(api.value, []) ret[api.value] = [ RouteInfo( route=e.route, method=e.method, - providers=[], + provider_types=[p.provider_type for p in providers], ) for e in endpoints ] diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 0adb42915..0fc9bd72e 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -60,8 +60,11 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An providers_with_specs[key] = specs apis_to_serve = run_config.apis or set( - list(providers_with_specs.keys()) + list(routing_table_apis) + list(providers_with_specs.keys()) + + [x.value for x in routing_table_apis] + + [x.value for x in router_apis] ) + print(f"{apis_to_serve=}") for info in builtin_automatically_routed_apis(): if info.router_api.value not in apis_to_serve: @@ -112,18 +115,22 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An sorted_providers = topological_sort( {k: v.values() for k, v in providers_with_specs.items()} ) + apis = [x[1].spec.api for x in sorted_providers] sorted_providers.append( ( "inspect", ProviderWithSpec( provider_id="__builtin__", provider_type="__builtin__", - config={}, + config={ + "run_config": run_config.dict(), + }, spec=InlineProviderSpec( api=Api.inspect, provider_type="__builtin__", config_class="llama_stack.distribution.inspect.DistributionInspectConfig", module="llama_stack.distribution.inspect", + api_dependencies=apis, ), ), ) @@ -233,6 +240,7 @@ async def instantiate_provider( fn = getattr(module, method) impl = await fn(*args) + impl.__provider_id__ = provider.provider_id impl.__provider_spec__ = provider_spec impl.__provider_config__ = config return impl diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index c56b33f21..361cee3f3 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -28,14 +28,8 @@ class MemoryRouter(Memory): async def shutdown(self) -> None: pass - async def list_memory_banks(self) -> List[MemoryBankDef]: - return self.routing_table.list_memory_banks() - - async def get_memory_bank(self, identifier: str) -> Optional[MemoryBankDef]: - return self.routing_table.get_memory_bank(identifier) - - async def register_memory_bank(self, bank: MemoryBankDef) -> None: - await self.routing_table.register_memory_bank(bank) + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: + await self.routing_table.register_memory_bank(memory_bank) async def insert_documents( self, @@ -73,12 +67,6 @@ class InferenceRouter(Inference): async def shutdown(self) -> None: pass - async def list_models(self) -> List[ModelDef]: - return self.routing_table.list_models() - - async def get_model(self, identifier: str) -> Optional[ModelDef]: - return self.routing_table.get_model(identifier) - async def register_model(self, model: ModelDef) -> None: await self.routing_table.register_model(model) @@ -149,12 +137,6 @@ class SafetyRouter(Safety): async def shutdown(self) -> None: pass - async def list_shields(self) -> List[ShieldDef]: - return self.routing_table.list_shields() - - async def get_shield(self, shield_type: str) -> Optional[ShieldDef]: - return self.routing_table.get_shield(shield_type) - async def register_shield(self, shield: ShieldDef) -> None: await self.routing_table.register_shield(shield) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index ef38b6391..3d89aa19f 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -80,12 +80,21 @@ class CommonRoutingTableImpl(RoutingTable): return obj return None - async def register_object(self, obj: RoutableObject) -> Any: + async def register_object(self, obj: RoutableObject): if obj.identifier in self.routing_key_to_object: - raise ValueError(f"Object `{obj.identifier}` already registered") + print(f"Object `{obj.identifier}` is already registered") + return - if obj.provider_id not in self.impls_by_provider_id: - raise ValueError(f"Provider `{obj.provider_id}` not found") + if not obj.provider_id: + provider_ids = list(self.impls_by_provider_id.keys()) + if not provider_ids: + raise ValueError("No providers found") + + print(f"Picking provider `{provider_ids[0]}` for {obj.identifier}") + obj.provider_id = provider_ids[0] + else: + if obj.provider_id not in self.impls_by_provider_id: + raise ValueError(f"Provider `{obj.provider_id}` not found") p = self.impls_by_provider_id[obj.provider_id] await register_object_with_provider(obj, p) @@ -93,6 +102,8 @@ class CommonRoutingTableImpl(RoutingTable): self.routing_key_to_object[obj.identifier] = obj self.registry.append(obj) + # TODO: persist this to a store + class ModelsRoutingTable(CommonRoutingTableImpl, Models): async def list_models(self) -> List[ModelDef]: From 862f8ddb8dfa1e9b0d2ee3b44726f50374cabbe2 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 22:10:24 -0700 Subject: [PATCH 44/69] more memory related fixes; memory.client now works --- llama_stack/apis/memory/client.py | 68 ++++++------------- llama_stack/apis/memory_banks/client.py | 22 ------ .../providers/utils/memory/vector_store.py | 10 +-- 3 files changed, 24 insertions(+), 76 deletions(-) diff --git a/llama_stack/apis/memory/client.py b/llama_stack/apis/memory/client.py index 04c2dab5b..89f7cac99 100644 --- a/llama_stack/apis/memory/client.py +++ b/llama_stack/apis/memory/client.py @@ -13,11 +13,11 @@ from typing import Any, Dict, List, Optional import fire import httpx -from termcolor import cprint from llama_stack.distribution.datatypes import RemoteProviderConfig from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.apis.memory_banks.client import MemoryBanksClient from llama_stack.providers.utils.memory.file_utils import data_url_from_file @@ -35,44 +35,16 @@ class MemoryClient(Memory): async def shutdown(self) -> None: pass - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: + async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: async with httpx.AsyncClient() as client: - r = await client.get( - f"{self.base_url}/memory/get", - params={ - "bank_id": bank_id, - }, - headers={"Content-Type": "application/json"}, - timeout=20, - ) - r.raise_for_status() - d = r.json() - if not d: - return None - return MemoryBank(**d) - - async def create_memory_bank( - self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - async with httpx.AsyncClient() as client: - r = await client.post( - f"{self.base_url}/memory/create", + response = await client.post( + f"{self.base_url}/memory/register_memory_bank", json={ - "name": name, - "config": config.dict(), - "url": url, + "memory_bank": json.loads(memory_bank.json()), }, headers={"Content-Type": "application/json"}, - timeout=20, ) - r.raise_for_status() - d = r.json() - if not d: - return None - return MemoryBank(**d) + response.raise_for_status() async def insert_documents( self, @@ -114,22 +86,20 @@ class MemoryClient(Memory): async def run_main(host: str, port: int, stream: bool): client = MemoryClient(f"http://{host}:{port}") + banks_client = MemoryBanksClient(f"http://{host}:{port}") - # create a memory bank - bank = await client.create_memory_bank( - name="test_bank", - config=VectorMemoryBankConfig( - bank_id="test_bank", - embedding_model="all-MiniLM-L6-v2", - chunk_size_in_tokens=512, - overlap_size_in_tokens=64, - ), + bank = VectorMemoryBankDef( + identifier="test_bank", + provider_id="", + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + overlap_size_in_tokens=64, ) - cprint(json.dumps(bank.dict(), indent=4), "green") + await client.register_memory_bank(bank) - retrieved_bank = await client.get_memory_bank(bank.bank_id) + retrieved_bank = await banks_client.get_memory_bank(bank.identifier) assert retrieved_bank is not None - assert retrieved_bank.config.embedding_model == "all-MiniLM-L6-v2" + assert retrieved_bank.embedding_model == "all-MiniLM-L6-v2" urls = [ "memory_optimizations.rst", @@ -162,13 +132,13 @@ async def run_main(host: str, port: int, stream: bool): # insert some documents await client.insert_documents( - bank_id=bank.bank_id, + bank_id=bank.identifier, documents=documents, ) # query the documents response = await client.query_documents( - bank_id=bank.bank_id, + bank_id=bank.identifier, query=[ "How do I use Lora?", ], @@ -178,7 +148,7 @@ async def run_main(host: str, port: int, stream: bool): print(f"Chunk:\n========\n{chunk}\n========\n") response = await client.query_documents( - bank_id=bank.bank_id, + bank_id=bank.identifier, query=[ "Tell me more about llama3 and torchtune", ], diff --git a/llama_stack/apis/memory_banks/client.py b/llama_stack/apis/memory_banks/client.py index 3b763d1f3..6a6e28133 100644 --- a/llama_stack/apis/memory_banks/client.py +++ b/llama_stack/apis/memory_banks/client.py @@ -5,7 +5,6 @@ # the root directory of this source tree. import asyncio -import json from typing import Any, Dict, List, Optional @@ -70,31 +69,10 @@ class MemoryBanksClient(MemoryBanks): j = response.json() return deserialize_memory_bank_def(j) - async def register_memory_bank(self, memory_bank: MemoryBankDef) -> None: - async with httpx.AsyncClient() as client: - response = await client.post( - f"{self.base_url}/memory/register_memory_bank", - json={ - "memory_bank": json.loads(memory_bank.json()), - }, - headers={"Content-Type": "application/json"}, - ) - response.raise_for_status() - async def run_main(host: str, port: int, stream: bool): client = MemoryBanksClient(f"http://{host}:{port}") - await client.register_memory_bank( - VectorMemoryBankDef( - identifier="test_bank", - provider_id="", - embedding_model="all-MiniLM-L6-v2", - chunk_size_in_tokens=512, - overlap_size_in_tokens=64, - ), - ) - response = await client.list_memory_banks() cprint(f"list_memory_banks response={response}", "green") diff --git a/llama_stack/providers/utils/memory/vector_store.py b/llama_stack/providers/utils/memory/vector_store.py index 0540cdf60..d0a7aed54 100644 --- a/llama_stack/providers/utils/memory/vector_store.py +++ b/llama_stack/providers/utils/memory/vector_store.py @@ -153,15 +153,15 @@ class BankWithIndex: self, documents: List[MemoryBankDocument], ) -> None: - model = get_embedding_model(self.bank.config.embedding_model) + model = get_embedding_model(self.bank.embedding_model) for doc in documents: content = await content_from_doc(doc) chunks = make_overlapped_chunks( doc.document_id, content, - self.bank.config.chunk_size_in_tokens, - self.bank.config.overlap_size_in_tokens - or (self.bank.config.chunk_size_in_tokens // 4), + self.bank.chunk_size_in_tokens, + self.bank.overlap_size_in_tokens + or (self.bank.chunk_size_in_tokens // 4), ) if not chunks: continue @@ -189,6 +189,6 @@ class BankWithIndex: else: query_str = _process(query) - model = get_embedding_model(self.bank.config.embedding_model) + model = get_embedding_model(self.bank.embedding_model) query_vector = model.encode([query_str])[0].astype(np.float32) return await self.index.query(query_vector, k) From 118c0ef1055f3627cf314f5a5110598c2a95065d Mon Sep 17 00:00:00 2001 From: Zain Hasan Date: Mon, 7 Oct 2024 01:21:50 -0400 Subject: [PATCH 45/69] Partial cleanup of weaviate --- llama_stack/distribution/datatypes.py | 8 - llama_stack/distribution/resolver.py | 1 - .../adapters/memory/weaviate/config.py | 10 +- .../adapters/memory/weaviate/weaviate.py | 166 ++++++++---------- 4 files changed, 82 insertions(+), 103 deletions(-) diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index f08eec462..c987d4c87 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -39,14 +39,6 @@ RoutedProtocol = Union[ ] -class ModelRegistry(Protocol): - def get_model(self, identifier: str) -> ModelDef: ... - - -class MemoryBankRegistry(Protocol): - def get_memory_bank(self, identifier: str) -> MemoryBankDef: ... - - # Example: /inference, /safety class AutoRoutedProviderSpec(ProviderSpec): provider_type: str = "router" diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 0fc9bd72e..2d3679177 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -64,7 +64,6 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An + [x.value for x in routing_table_apis] + [x.value for x in router_apis] ) - print(f"{apis_to_serve=}") for info in builtin_automatically_routed_apis(): if info.router_api.value not in apis_to_serve: diff --git a/llama_stack/providers/adapters/memory/weaviate/config.py b/llama_stack/providers/adapters/memory/weaviate/config.py index db73604d2..d0811acb4 100644 --- a/llama_stack/providers/adapters/memory/weaviate/config.py +++ b/llama_stack/providers/adapters/memory/weaviate/config.py @@ -4,15 +4,13 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_models.schema_utils import json_schema_type -from pydantic import BaseModel, Field +from pydantic import BaseModel + class WeaviateRequestProviderData(BaseModel): - # if there _is_ provider data, it must specify the API KEY - # if you want it to be optional, use Optional[str] weaviate_api_key: str weaviate_cluster_url: str -@json_schema_type + class WeaviateConfig(BaseModel): - collection: str = Field(default="MemoryBank") + pass diff --git a/llama_stack/providers/adapters/memory/weaviate/weaviate.py b/llama_stack/providers/adapters/memory/weaviate/weaviate.py index abfe27150..9f8e93434 100644 --- a/llama_stack/providers/adapters/memory/weaviate/weaviate.py +++ b/llama_stack/providers/adapters/memory/weaviate/weaviate.py @@ -1,14 +1,19 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + import json -import uuid -from typing import List, Optional, Dict, Any -from numpy.typing import NDArray +from typing import Any, Dict, List, Optional import weaviate import weaviate.classes as wvc +from numpy.typing import NDArray from weaviate.classes.init import Auth -from llama_stack.apis.memory import * -from llama_stack.distribution.request_headers import get_request_provider_data +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.providers.utils.memory.vector_store import ( BankWithIndex, EmbeddingIndex, @@ -16,40 +21,43 @@ from llama_stack.providers.utils.memory.vector_store import ( from .config import WeaviateConfig, WeaviateRequestProviderData + class WeaviateIndex(EmbeddingIndex): def __init__(self, client: weaviate.Client, collection: str): self.client = client self.collection = collection async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray): - assert len(chunks) == len(embeddings), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}" + assert len(chunks) == len( + embeddings + ), f"Chunk length {len(chunks)} does not match embedding length {len(embeddings)}" data_objects = [] for i, chunk in enumerate(chunks): - - data_objects.append(wvc.data.DataObject( - properties={ - "chunk_content": chunk, - }, - vector = embeddings[i].tolist() - )) + data_objects.append( + wvc.data.DataObject( + properties={ + "chunk_content": chunk, + }, + vector=embeddings[i].tolist(), + ) + ) # Inserting chunks into a prespecified Weaviate collection assert self.collection is not None, "Collection name must be specified" my_collection = self.client.collections.get(self.collection) - - await my_collection.data.insert_many(data_objects) + await my_collection.data.insert_many(data_objects) async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse: assert self.collection is not None, "Collection name must be specified" my_collection = self.client.collections.get(self.collection) - + results = my_collection.query.near_vector( - near_vector = embedding.tolist(), - limit = k, - return_meta_data = wvc.query.MetadataQuery(distance=True) + near_vector=embedding.tolist(), + limit=k, + return_meta_data=wvc.query.MetadataQuery(distance=True), ) chunks = [] @@ -59,102 +67,84 @@ class WeaviateIndex(EmbeddingIndex): chunk = doc.properties["chunk_content"] chunks.append(chunk) scores.append(1.0 / doc.metadata.distance) - + except Exception as e: import traceback + traceback.print_exc() print(f"Failed to parse document: {e}") return QueryDocumentsResponse(chunks=chunks, scores=scores) -class WeaviateMemoryAdapter(Memory): +class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): def __init__(self, config: WeaviateConfig) -> None: self.config = config - self.client = None + self.client_cache = {} self.cache = {} def _get_client(self) -> weaviate.Client: - request_provider_data = get_request_provider_data() - - if request_provider_data is not None: - assert isinstance(request_provider_data, WeaviateRequestProviderData) - - # Connect to Weaviate Cloud - return weaviate.connect_to_weaviate_cloud( - cluster_url = request_provider_data.weaviate_cluster_url, - auth_credentials = Auth.api_key(request_provider_data.weaviate_api_key), - ) + provider_data = self.get_request_provider_data() + assert provider_data is not None, "Request provider data must be set" + assert isinstance(provider_data, WeaviateRequestProviderData) + + key = f"{provider_data.weaviate_cluster_url}::{provider_data.weaviate_api_key}" + if key in self.client_cache: + return self.client_cache[key] + + client = weaviate.connect_to_weaviate_cloud( + cluster_url=provider_data.weaviate_cluster_url, + auth_credentials=Auth.api_key(provider_data.weaviate_api_key), + ) + self.client_cache[key] = client + return client async def initialize(self) -> None: - try: - self.client = self._get_client() - - # Create collection if it doesn't exist - if not self.client.collections.exists(self.config.collection): - self.client.collections.create( - name = self.config.collection, - vectorizer_config = wvc.config.Configure.Vectorizer.none(), - properties=[ - wvc.config.Property( - name="chunk_content", - data_type=wvc.config.DataType.TEXT, - ), - ] - ) - - except Exception as e: - import traceback - traceback.print_exc() - raise RuntimeError("Could not connect to Weaviate server") from e + pass async def shutdown(self) -> None: - self.client = self._get_client() + for client in self.client_cache.values(): + client.close() - if self.client: - self.client.close() - - async def create_memory_bank( + async def register_memory_bank( self, - name: str, - config: MemoryBankConfig, - url: Optional[URL] = None, - ) -> MemoryBank: - bank_id = str(uuid.uuid4()) - bank = MemoryBank( - bank_id=bank_id, - name=name, - config=config, - url=url, - ) - self.client = self._get_client() - - # Store the bank as a new collection in Weaviate - self.client.collections.create( - name=bank_id - ) + memory_bank: MemoryBankDef, + ) -> None: + assert ( + memory_bank.type == MemoryBankType.vector.value + ), f"Only vector banks are supported {memory_bank.type}" + + client = await self._get_client() + + # Create collection if it doesn't exist + if not client.collections.exists(memory_bank.identifier): + client.collections.create( + name=smemory_bank.identifier, + vectorizer_config=wvc.config.Configure.Vectorizer.none(), + properties=[ + wvc.config.Property( + name="chunk_content", + data_type=wvc.config.DataType.TEXT, + ), + ], + ) index = BankWithIndex( - bank=bank, - index=WeaviateIndex(cleint = self.client, collection = bank_id), + bank=memory_bank, + index=WeaviateIndex(client=client, collection=memory_bank.identifier), ) self.cache[bank_id] = index - return bank - - async def get_memory_bank(self, bank_id: str) -> Optional[MemoryBank]: - bank_index = await self._get_and_cache_bank_index(bank_id) - if bank_index is None: - return None - return bank_index.bank async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]: - - self.client = self._get_client() - if bank_id in self.cache: return self.cache[bank_id] - collections = await self.client.collections.list_all().keys() + bank = await self.memory_bank_store.get_memory_bank(bank_id) + if not bank: + raise ValueError(f"Bank {bank_id} not found") + + client = await self._get_client() + collections = await client.collections.list_all().keys() for collection in collections: if collection == bank_id: @@ -189,4 +179,4 @@ class WeaviateMemoryAdapter(Memory): if not index: raise ValueError(f"Bank {bank_id} not found") - return await index.query_documents(query, params) \ No newline at end of file + return await index.query_documents(query, params) From a05599c67aeeb3466dbb18b529256c0468c6fcfe Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sun, 6 Oct 2024 22:50:34 -0700 Subject: [PATCH 46/69] Weaviate "should" work (i.e., is code-complete) but not tested --- .../adapters/memory/weaviate/weaviate.py | 52 +++++++------------ 1 file changed, 18 insertions(+), 34 deletions(-) diff --git a/llama_stack/providers/adapters/memory/weaviate/weaviate.py b/llama_stack/providers/adapters/memory/weaviate/weaviate.py index 9f8e93434..573802c84 100644 --- a/llama_stack/providers/adapters/memory/weaviate/weaviate.py +++ b/llama_stack/providers/adapters/memory/weaviate/weaviate.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json from typing import Any, Dict, List, Optional import weaviate @@ -23,9 +22,9 @@ from .config import WeaviateConfig, WeaviateRequestProviderData class WeaviateIndex(EmbeddingIndex): - def __init__(self, client: weaviate.Client, collection: str): + def __init__(self, client: weaviate.Client, collection_name: str): self.client = client - self.collection = collection + self.collection_name = collection_name async def add_chunks(self, chunks: List[Chunk], embeddings: NDArray): assert len(chunks) == len( @@ -44,17 +43,13 @@ class WeaviateIndex(EmbeddingIndex): ) # Inserting chunks into a prespecified Weaviate collection - assert self.collection is not None, "Collection name must be specified" - my_collection = self.client.collections.get(self.collection) - - await my_collection.data.insert_many(data_objects) + collection = self.client.collections.get(self.collection_name) + await collection.data.insert_many(data_objects) async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse: - assert self.collection is not None, "Collection name must be specified" + collection = self.client.collections.get(self.collection_name) - my_collection = self.client.collections.get(self.collection) - - results = my_collection.query.near_vector( + results = collection.query.near_vector( near_vector=embedding.tolist(), limit=k, return_meta_data=wvc.query.MetadataQuery(distance=True), @@ -63,16 +58,9 @@ class WeaviateIndex(EmbeddingIndex): chunks = [] scores = [] for doc in results.objects: - try: - chunk = doc.properties["chunk_content"] - chunks.append(chunk) - scores.append(1.0 / doc.metadata.distance) - - except Exception as e: - import traceback - - traceback.print_exc() - print(f"Failed to parse document: {e}") + chunk = doc.properties["chunk_content"] + chunks.append(chunk) + scores.append(1.0 / doc.metadata.distance) return QueryDocumentsResponse(chunks=chunks, scores=scores) @@ -131,7 +119,7 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): index = BankWithIndex( bank=memory_bank, - index=WeaviateIndex(client=client, collection=memory_bank.identifier), + index=WeaviateIndex(client=client, collection_name=memory_bank.identifier), ) self.cache[bank_id] = index @@ -144,19 +132,15 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): raise ValueError(f"Bank {bank_id} not found") client = await self._get_client() - collections = await client.collections.list_all().keys() + if not client.collections.exists(bank_id): + raise ValueError(f"Collection with name `{bank_id}` not found") - for collection in collections: - if collection == bank_id: - bank = MemoryBank(**json.loads(collection.metadata["bank"])) - index = BankWithIndex( - bank=bank, - index=WeaviateIndex(self.client, collection), - ) - self.cache[bank_id] = index - return index - - return None + index = BankWithIndex( + bank=bank, + index=WeaviateIndex(client=client, collection_name=bank_id), + ) + self.cache[bank_id] = index + return index async def insert_documents( self, From 353c7dc82a0e12cbffe6a29e38858cc789ed8c1c Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 13:55:01 -0700 Subject: [PATCH 47/69] A few bug fixes for covering corner cases --- llama_stack/cli/stack/build.py | 2 +- llama_stack/cli/stack/configure.py | 2 +- llama_stack/cli/stack/run.py | 6 ++-- llama_stack/distribution/configure.py | 43 ++++++++++++++++----------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index f07a0f873..3fe615e6e 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -22,7 +22,7 @@ def available_templates_specs() -> List[BuildConfig]: import yaml template_specs = [] - for p in TEMPLATES_PATH.rglob("*.yaml"): + for p in TEMPLATES_PATH.rglob("*build.yaml"): with open(p, "r") as f: build_config = BuildConfig(**yaml.safe_load(f)) template_specs.append(build_config) diff --git a/llama_stack/cli/stack/configure.py b/llama_stack/cli/stack/configure.py index 13899715b..021134e6d 100644 --- a/llama_stack/cli/stack/configure.py +++ b/llama_stack/cli/stack/configure.py @@ -152,7 +152,7 @@ class StackConfigure(Subcommand): config = StackRunConfig( built_at=datetime.now(), image_name=image_name, - apis=[], + apis=list(build_config.distribution_spec.providers.keys()), providers={}, models=[], shields=[], diff --git a/llama_stack/cli/stack/run.py b/llama_stack/cli/stack/run.py index 033b2a81f..dd4247e4b 100644 --- a/llama_stack/cli/stack/run.py +++ b/llama_stack/cli/stack/run.py @@ -7,7 +7,6 @@ import argparse from llama_stack.cli.subcommand import Subcommand -from llama_stack.distribution.datatypes import * # noqa: F403 class StackRun(Subcommand): @@ -49,8 +48,8 @@ class StackRun(Subcommand): from termcolor import cprint from llama_stack.distribution.build import ImageType + from llama_stack.distribution.configure import parse_and_maybe_upgrade_config from llama_stack.distribution.utils.config_dirs import BUILDS_BASE_DIR - from llama_stack.distribution.utils.exec import run_with_pty if not args.config: @@ -78,7 +77,8 @@ class StackRun(Subcommand): cprint(f"Using config `{config_file}`", "green") with open(config_file, "r") as f: - config = StackRunConfig(**yaml.safe_load(f)) + config_dict = yaml.safe_load(config_file.read_text()) + config = parse_and_maybe_upgrade_config(config_dict) if config.docker_image: script = pkg_resources.resource_filename( diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index 12f225af2..f533422fe 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -64,9 +64,6 @@ def configure_api_providers( ) -> StackRunConfig: is_nux = len(config.providers) == 0 - # keep this default so all APIs are served - config.apis = [] - if is_nux: print( textwrap.dedent( @@ -79,7 +76,12 @@ def configure_api_providers( provider_registry = get_provider_registry() builtin_apis = [a.routing_table_api for a in builtin_automatically_routed_apis()] - apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect)] + + if config.apis: + apis_to_serve = config.apis + else: + apis_to_serve = [a.value for a in Api if a not in (Api.telemetry, Api.inspect)] + for api_str in apis_to_serve: api = Api(api_str) if api in builtin_apis: @@ -153,7 +155,7 @@ def configure_api_providers( "shields": (ShieldDef, configure_shields, "safety"), "memory_banks": (MemoryBankDef, configure_memory_banks, "memory"), } - safety_providers = config.providers["safety"] + safety_providers = config.providers.get("safety", []) for otype, (odef, config_method, api_str) in object_types.items(): existing_objects = getattr(config, otype) @@ -166,9 +168,15 @@ def configure_api_providers( ) updated_objects = existing_objects else: - # we are newly configuring this API - cprint(f"Configuring `{otype}`...", "blue", attrs=["bold"]) - updated_objects = config_method(config.providers[api_str], safety_providers) + providers = config.providers.get(api_str, []) + if not providers: + updated_objects = [] + else: + # we are newly configuring this API + cprint(f"Configuring `{otype}`...", "blue", attrs=["bold"]) + updated_objects = config_method( + config.providers[api_str], safety_providers + ) setattr(config, otype, updated_objects) print("") @@ -277,7 +285,7 @@ def upgrade_from_routing_table_to_registry( shields = [] memory_banks = [] - routing_table = config_dict["routing_table"] + routing_table = config_dict.get("routing_table", {}) for api_str, entries in routing_table.items(): providers = get_providers(entries) providers_by_api[api_str] = providers @@ -324,15 +332,13 @@ def upgrade_from_routing_table_to_registry( config_dict["shields"] = shields config_dict["memory_banks"] = memory_banks - if "api_providers" in config_dict: - for api_str, provider in config_dict["api_providers"].items(): - if api_str in ("inference", "safety", "memory"): - continue - + provider_map = config_dict.get("api_providers", config_dict.get("provider_map", {})) + if provider_map: + for api_str, provider in provider_map.items(): if isinstance(provider, dict): providers_by_api[api_str] = [ Provider( - provider_id=f"{provider['provider_type']}-00", + provider_id=f"{provider['provider_type']}", provider_type=provider["provider_type"], config=provider["config"], ) @@ -340,11 +346,12 @@ def upgrade_from_routing_table_to_registry( config_dict["providers"] = providers_by_api - del config_dict["routing_table"] - del config_dict["api_providers"] + config_dict.pop("routing_table", None) + config_dict.pop("api_providers", None) + config_dict.pop("provider_map", None) config_dict["apis"] = config_dict["apis_to_serve"] - del config_dict["apis_to_serve"] + config_dict.pop("apis_to_serve", None) return config_dict From 4fa467731e69ece2f96aeb9b1ca3228388279179 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 14:35:50 -0700 Subject: [PATCH 48/69] Fix a bug in meta-reference inference when stream=False Also introduce a gross hack (to cover grosser(?) hack) to ensure non-stream requests don't send back responses in SSE format. Not sure which of these hacks is grosser. --- llama_stack/distribution/server/server.py | 21 +++++++++++++--- .../meta_reference/inference/inference.py | 24 +++++++++---------- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index dd3fafd0a..7b19f7996 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -58,13 +58,28 @@ def is_async_iterator_type(typ): ) -def create_sse_event(data: Any) -> str: +def create_sse_event(data: Any, **kwargs) -> str: if isinstance(data, BaseModel): data = data.json() else: data = json.dumps(data) - return f"data: {data}\n\n" + # !!FIX THIS ASAP!! grossest hack ever; not really SSE + # + # we use the return type of the function to determine if there's an AsyncGenerator + # and change the implementation to send SSE. unfortunately, chat_completion() takes a + # parameter called stream which _changes_ the return type. one correct way to fix this is: + # + # - have separate underlying functions for streaming and non-streaming because they need + # to operate differently anyhow + # - do a late binding of the return type based on the parameters passed in + if kwargs.get("stream", False): + return f"data: {data}\n\n" + else: + print( + f"!!FIX THIS ASAP!! Sending non-SSE event because client really is non-SSE: {data}" + ) + return data async def global_exception_handler(request: Request, exc: Exception): @@ -226,7 +241,7 @@ def create_dynamic_typed_route(func: Any, method: str): async def sse_generator(event_gen): try: async for item in event_gen: - yield create_sse_event(item) + yield create_sse_event(item, **kwargs) await asyncio.sleep(0.01) except asyncio.CancelledError: print("Generator cancelled") diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index f36d65c3f..a310a479a 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -6,7 +6,7 @@ import asyncio -from typing import AsyncIterator, List, Union +from typing import AsyncGenerator, List from llama_models.sku_list import resolve_model @@ -58,9 +58,7 @@ class MetaReferenceInferenceImpl(Inference): tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, - ) -> AsyncIterator[ - Union[ChatCompletionResponseStreamChunk, ChatCompletionResponse] - ]: + ) -> AsyncGenerator: # wrapper request to make it easier to pass around (internal only, not exposed to API) request = ChatCompletionRequest( model=model, @@ -117,15 +115,17 @@ class MetaReferenceInferenceImpl(Inference): if not ipython and buffer.startswith("<|python_tag|>"): ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), + if request.stream: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), + ) ) - ) + buffer = buffer[len("<|python_tag|>") :] continue From 3ae2b712e84c31eb0da76bba172e58865fcbf36b Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 15:46:16 -0700 Subject: [PATCH 49/69] Add inference test Run it as: ``` PROVIDER_ID=test-remote \ PROVIDER_CONFIG=$PWD/llama_stack/providers/tests/inference/provider_config_example.yaml \ pytest -s llama_stack/providers/tests/inference/test_inference.py \ --tb=auto \ --disable-warnings ``` --- llama_stack/apis/inference/client.py | 56 ++-- llama_stack/distribution/datatypes.py | 1 + .../adapters/inference/ollama/ollama.py | 45 ++- .../providers/adapters/inference/tgi/tgi.py | 5 +- llama_stack/providers/tests/__init__.py | 5 + .../providers/tests/inference/__init__.py | 5 + .../inference/provider_config_example.yaml | 15 + .../tests/inference/test_inference.py | 278 ++++++++++++++++++ 8 files changed, 356 insertions(+), 54 deletions(-) create mode 100644 llama_stack/providers/tests/__init__.py create mode 100644 llama_stack/providers/tests/inference/__init__.py create mode 100644 llama_stack/providers/tests/inference/provider_config_example.yaml create mode 100644 llama_stack/providers/tests/inference/test_inference.py diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index fffcf4692..8b822058f 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -67,25 +67,26 @@ class InferenceClient(Inference): logprobs=logprobs, ) async with httpx.AsyncClient() as client: - async with client.stream( - "POST", - f"{self.base_url}/inference/chat_completion", - json=encodable_dict(request), - headers={"Content-Type": "application/json"}, - timeout=20, - ) as response: - if response.status_code != 200: - content = await response.aread() - cprint( - f"Error: HTTP {response.status_code} {content.decode()}", "red" - ) - return + if stream: + async with client.stream( + "POST", + f"{self.base_url}/inference/chat_completion", + json=encodable_dict(request), + headers={"Content-Type": "application/json"}, + timeout=20, + ) as response: + if response.status_code != 200: + content = await response.aread() + cprint( + f"Error: HTTP {response.status_code} {content.decode()}", + "red", + ) + return - async for line in response.aiter_lines(): - if line.startswith("data:"): - data = line[len("data: ") :] - try: - if request.stream: + async for line in response.aiter_lines(): + if line.startswith("data:"): + data = line[len("data: ") :] + try: if "error" in data: cprint(data, "red") continue @@ -93,11 +94,20 @@ class InferenceClient(Inference): yield ChatCompletionResponseStreamChunk( **json.loads(data) ) - else: - yield ChatCompletionResponse(**json.loads(data)) - except Exception as e: - print(data) - print(f"Error with parsing or validation: {e}") + except Exception as e: + print(data) + print(f"Error with parsing or validation: {e}") + else: + response = await client.post( + f"{self.base_url}/inference/chat_completion", + json=encodable_dict(request), + headers={"Content-Type": "application/json"}, + timeout=20, + ) + + response.raise_for_status() + j = response.json() + yield ChatCompletionResponse(**j) async def run_main( diff --git a/llama_stack/distribution/datatypes.py b/llama_stack/distribution/datatypes.py index c987d4c87..e09a6939c 100644 --- a/llama_stack/distribution/datatypes.py +++ b/llama_stack/distribution/datatypes.py @@ -109,6 +109,7 @@ this could be just a hash description="Reference to the conda environment if this package refers to a conda environment", ) apis: List[str] = Field( + default_factory=list, description=""" The list of APIs to serve. If not specified, all APIs specified in the provider_map will be served""", ) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 09af46b11..40a3f5977 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -36,8 +36,8 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): self, stack_to_provider_models_map=OLLAMA_SUPPORTED_MODELS ) self.url = url - tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(tokenizer) + self.tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(self.tokenizer) @property def client(self) -> AsyncClient: @@ -65,17 +65,6 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def _messages_to_ollama_messages(self, messages: list[Message]) -> list: - ollama_messages = [] - for message in messages: - if message.role == "ipython": - role = "tool" - else: - role = message.role - ollama_messages.append({"role": role, "content": message.content}) - - return ollama_messages - def get_ollama_chat_options(self, request: ChatCompletionRequest) -> dict: options = {} if request.sampling_params is not None: @@ -113,6 +102,9 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): ) messages = augment_messages_for_tools(request) + model_input = self.formatter.encode_dialog_prompt(messages) + prompt = self.tokenizer.decode(model_input.tokens) + # accumulate sampling params and other options to pass to ollama options = self.get_ollama_chat_options(request) ollama_model = self.map_to_provider_model(request.model) @@ -131,13 +123,16 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): status["status"] == "success" ), f"Failed to pull model {self.model} in ollama" + common_params = { + "model": ollama_model, + "prompt": prompt, + "options": options, + "raw": True, + "stream": request.stream, + } + if not request.stream: - r = await self.client.chat( - model=ollama_model, - messages=self._messages_to_ollama_messages(messages), - stream=False, - options=options, - ) + r = await self.client.generate(**common_params) stop_reason = None if r["done"]: if r["done_reason"] == "stop": @@ -146,7 +141,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): stop_reason = StopReason.out_of_tokens completion_message = self.formatter.decode_assistant_message_from_content( - r["message"]["content"], stop_reason + r["response"], stop_reason ) yield ChatCompletionResponse( completion_message=completion_message, @@ -159,12 +154,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): delta="", ) ) - stream = await self.client.chat( - model=ollama_model, - messages=self._messages_to_ollama_messages(messages), - stream=True, - options=options, - ) + stream = await self.client.generate(**common_params) buffer = "" ipython = False @@ -178,8 +168,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): stop_reason = StopReason.out_of_tokens break - text = chunk["message"]["content"] - + text = chunk["response"] # check if its a tool call ( aka starts with <|python_tag|> ) if not ipython and text.startswith("<|python_tag|>"): ipython = True diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index 24b664068..0ad20edd6 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -100,8 +100,6 @@ class _HfAdapter(Inference): self.max_tokens - input_tokens - 1, ) - print(f"Calculated max_new_tokens: {max_new_tokens}") - options = self.get_chat_options(request) if not request.stream: response = await self.client.text_generation( @@ -119,8 +117,9 @@ class _HfAdapter(Inference): elif response.details.finish_reason == "length": stop_reason = StopReason.out_of_tokens + generated_text = "".join(t.text for t in response.details.tokens) completion_message = self.formatter.decode_assistant_message_from_content( - response.generated_text, + generated_text, stop_reason, ) yield ChatCompletionResponse( diff --git a/llama_stack/providers/tests/__init__.py b/llama_stack/providers/tests/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/tests/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/tests/inference/__init__.py b/llama_stack/providers/tests/inference/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/tests/inference/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/tests/inference/provider_config_example.yaml b/llama_stack/providers/tests/inference/provider_config_example.yaml new file mode 100644 index 000000000..014ce84d4 --- /dev/null +++ b/llama_stack/providers/tests/inference/provider_config_example.yaml @@ -0,0 +1,15 @@ +providers: + - provider_id: test-ollama + provider_type: remote::ollama + config: + host: localhost + port: 11434 + - provider_id: test-tgi + provider_type: remote::tgi + config: + url: http://localhost:7001 + - provider_id: test-remote + provider_type: remote + config: + host: localhost + port: 7002 diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py new file mode 100644 index 000000000..61989b691 --- /dev/null +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -0,0 +1,278 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import itertools +import os +from datetime import datetime + +import pytest +import pytest_asyncio +import yaml + +from llama_models.llama3.api.datatypes import * # noqa: F403 +from llama_stack.apis.inference import * # noqa: F403 + +from llama_stack.distribution.datatypes import * # noqa: F403 +from llama_stack.distribution.configure import parse_and_maybe_upgrade_config +from llama_stack.distribution.resolver import resolve_impls_with_routing + + +def group_chunks(response): + return { + event_type: list(group) + for event_type, group in itertools.groupby( + response, key=lambda chunk: chunk.event.event_type + ) + } + + +Llama_8B = "Llama3.1-8B-Instruct" +Llama_3B = "Llama3.2-3B-Instruct" + + +def get_expected_stop_reason(model: str): + return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn + + +async def stack_impls(model): + if "PROVIDER_CONFIG" not in os.environ: + raise ValueError( + "You must set PROVIDER_CONFIG to a YAML file containing provider config" + ) + + with open(os.environ["PROVIDER_CONFIG"], "r") as f: + config_dict = yaml.safe_load(f) + + if "providers" not in config_dict: + raise ValueError("Config file should contain a `providers` key") + + providers_by_id = {x["provider_id"]: x for x in config_dict["providers"]} + if len(providers_by_id) == 0: + raise ValueError("No providers found in config file") + + if "PROVIDER_ID" in os.environ: + provider_id = os.environ["PROVIDER_ID"] + if provider_id not in providers_by_id: + raise ValueError(f"Provider ID {provider_id} not found in config file") + provider = providers_by_id[provider_id] + else: + provider = list(providers_by_id.values())[0] + print(f"No provider ID specified, picking first {provider['provider_id']}") + + config_dict = dict( + built_at=datetime.now(), + image_name="test-fixture", + apis=[ + Api.inference, + Api.models, + ], + providers=dict( + inference=[ + Provider(**provider), + ] + ), + models=[ + ModelDef( + identifier=model, + llama_model=model, + provider_id=provider["provider_id"], + ) + ], + shields=[], + memory_banks=[], + ) + run_config = parse_and_maybe_upgrade_config(config_dict) + impls = await resolve_impls_with_routing(run_config) + return impls + + +# This is going to create multiple Stack impls without tearing down the previous one +# Fix that! +@pytest_asyncio.fixture( + scope="session", + params=[ + {"model": Llama_8B}, + {"model": Llama_3B}, + ], +) +async def inference_settings(request): + model = request.param["model"] + impls = await stack_impls(model) + return { + "impl": impls[Api.inference], + "common_params": { + "model": model, + "tool_choice": ToolChoice.auto, + "tool_prompt_format": ( + ToolPromptFormat.json + if "Llama3.1" in model + else ToolPromptFormat.python_list + ), + }, + } + + +@pytest.fixture +def sample_messages(): + return [ + SystemMessage(content="You are a helpful assistant."), + UserMessage(content="What's the weather like today?"), + ] + + +@pytest.fixture +def sample_tool_definition(): + return ToolDefinition( + tool_name="get_weather", + description="Get the current weather", + parameters={ + "location": ToolParamDefinition( + param_type="string", + description="The city and state, e.g. San Francisco, CA", + ), + }, + ) + + +@pytest.mark.asyncio +async def test_chat_completion_non_streaming(inference_settings, sample_messages): + inference_impl = inference_settings["impl"] + response = [ + r + async for r in inference_impl.chat_completion( + messages=sample_messages, + stream=False, + **inference_settings["common_params"], + ) + ] + + assert len(response) == 1 + assert isinstance(response[0], ChatCompletionResponse) + assert response[0].completion_message.role == "assistant" + assert isinstance(response[0].completion_message.content, str) + assert len(response[0].completion_message.content) > 0 + + +@pytest.mark.asyncio +async def test_chat_completion_streaming(inference_settings, sample_messages): + inference_impl = inference_settings["impl"] + response = [ + r + async for r in inference_impl.chat_completion( + messages=sample_messages, + stream=True, + **inference_settings["common_params"], + ) + ] + + assert len(response) > 0 + assert all( + isinstance(chunk, ChatCompletionResponseStreamChunk) for chunk in response + ) + grouped = group_chunks(response) + assert len(grouped[ChatCompletionResponseEventType.start]) == 1 + assert len(grouped[ChatCompletionResponseEventType.progress]) > 0 + assert len(grouped[ChatCompletionResponseEventType.complete]) == 1 + + end = grouped[ChatCompletionResponseEventType.complete][0] + assert end.event.stop_reason == StopReason.end_of_turn + + +@pytest.mark.asyncio +async def test_chat_completion_with_tool_calling( + inference_settings, + sample_messages, + sample_tool_definition, +): + inference_impl = inference_settings["impl"] + messages = sample_messages + [ + UserMessage( + content="What's the weather like in San Francisco?", + ) + ] + + response = [ + r + async for r in inference_impl.chat_completion( + messages=messages, + tools=[sample_tool_definition], + stream=False, + **inference_settings["common_params"], + ) + ] + + assert len(response) == 1 + assert isinstance(response[0], ChatCompletionResponse) + + message = response[0].completion_message + + stop_reason = get_expected_stop_reason(inference_settings["common_params"]["model"]) + assert message.stop_reason == stop_reason + assert message.tool_calls is not None + assert len(message.tool_calls) > 0 + + call = message.tool_calls[0] + assert call.tool_name == "get_weather" + assert "location" in call.arguments + assert "San Francisco" in call.arguments["location"] + + +@pytest.mark.asyncio +async def test_chat_completion_with_tool_calling_streaming( + inference_settings, + sample_messages, + sample_tool_definition, +): + inference_impl = inference_settings["impl"] + messages = sample_messages + [ + UserMessage( + content="What's the weather like in San Francisco?", + ) + ] + + response = [ + r + async for r in inference_impl.chat_completion( + messages=messages, + tools=[sample_tool_definition], + stream=True, + **inference_settings["common_params"], + ) + ] + + assert len(response) > 0 + assert all( + isinstance(chunk, ChatCompletionResponseStreamChunk) for chunk in response + ) + grouped = group_chunks(response) + assert len(grouped[ChatCompletionResponseEventType.start]) == 1 + assert len(grouped[ChatCompletionResponseEventType.progress]) > 0 + assert len(grouped[ChatCompletionResponseEventType.complete]) == 1 + + end = grouped[ChatCompletionResponseEventType.complete][0] + expected_stop_reason = get_expected_stop_reason( + inference_settings["common_params"]["model"] + ) + assert end.event.stop_reason == expected_stop_reason + + model = inference_settings["common_params"]["model"] + if "Llama3.1" in model: + assert all( + isinstance(chunk.event.delta, ToolCallDelta) + for chunk in grouped[ChatCompletionResponseEventType.progress] + ) + first = grouped[ChatCompletionResponseEventType.progress][0] + assert first.event.delta.parse_status == ToolCallParseStatus.started + + last = grouped[ChatCompletionResponseEventType.progress][-1] + assert last.event.stop_reason == expected_stop_reason + assert last.event.delta.parse_status == ToolCallParseStatus.success + assert isinstance(last.event.delta.content, ToolCall) + + call = last.event.delta.content + assert call.tool_name == "get_weather" + assert "location" in call.arguments + assert "San Francisco" in call.arguments["location"] From bbd3a026159e95135409099c630441ca18b71f0f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 17:28:19 -0700 Subject: [PATCH 50/69] Make Together inference work using the raw completions API --- .../adapters/inference/together/together.py | 32 ++++++++++--------- .../tests/inference/test_inference.py | 19 +++++++++-- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 2ee90d8e3..73e0edc4e 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -41,8 +41,8 @@ class TogetherInferenceAdapter( self, stack_to_provider_models_map=TOGETHER_SUPPORTED_MODELS ) self.config = config - tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(tokenizer) + self.tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(self.tokenizer) @property def client(self) -> Together: @@ -124,27 +124,28 @@ class TogetherInferenceAdapter( options = self.get_together_chat_options(request) together_model = self.map_to_provider_model(request.model) messages = augment_messages_for_tools(request) + model_input = self.formatter.encode_dialog_prompt(messages) + prompt = self.tokenizer.decode(model_input.tokens) if not request.stream: # TODO: might need to add back an async here - r = client.chat.completions.create( + r = client.completions.create( model=together_model, - messages=self._messages_to_together_messages(messages), + prompt=prompt, stream=False, **options, ) stop_reason = None - if r.choices[0].finish_reason: - if ( - r.choices[0].finish_reason == "stop" - or r.choices[0].finish_reason == "eos" - ): + choice = r.choices[0] + if choice.finish_reason: + if choice.finish_reason in ["stop", "eos"]: stop_reason = StopReason.end_of_turn - elif r.choices[0].finish_reason == "length": + stop_reason = StopReason.end_of_turn + elif choice.finish_reason == "length": stop_reason = StopReason.out_of_tokens completion_message = self.formatter.decode_assistant_message_from_content( - r.choices[0].message.content, stop_reason + choice.text, stop_reason ) yield ChatCompletionResponse( completion_message=completion_message, @@ -162,20 +163,21 @@ class TogetherInferenceAdapter( ipython = False stop_reason = None - for chunk in client.chat.completions.create( + for chunk in client.completions.create( model=together_model, - messages=self._messages_to_together_messages(messages), + prompt=prompt, stream=True, **options, ): - if finish_reason := chunk.choices[0].finish_reason: + choice = chunk.choices[0] + if finish_reason := choice.finish_reason: if stop_reason is None and finish_reason in ["stop", "eos"]: stop_reason = StopReason.end_of_turn elif stop_reason is None and finish_reason == "length": stop_reason = StopReason.out_of_tokens break - text = chunk.choices[0].delta.content + text = choice.delta.content if text is None: continue diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 61989b691..794cbaa2b 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import itertools +import json import os from datetime import datetime @@ -17,6 +18,7 @@ from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.configure import parse_and_maybe_upgrade_config +from llama_stack.distribution.request_headers import set_request_provider_data from llama_stack.distribution.resolver import resolve_impls_with_routing @@ -60,9 +62,10 @@ async def stack_impls(model): provider = providers_by_id[provider_id] else: provider = list(providers_by_id.values())[0] - print(f"No provider ID specified, picking first {provider['provider_id']}") + provider_id = provider["provider_id"] + print(f"No provider ID specified, picking first `{provider_id}`") - config_dict = dict( + run_config = dict( built_at=datetime.now(), image_name="test-fixture", apis=[ @@ -84,8 +87,17 @@ async def stack_impls(model): shields=[], memory_banks=[], ) - run_config = parse_and_maybe_upgrade_config(config_dict) + run_config = parse_and_maybe_upgrade_config(run_config) impls = await resolve_impls_with_routing(run_config) + + # may need something cleaner here + if "provider_data" in config_dict: + provider_data = config_dict["provider_data"].get(provider_id, {}) + if provider_data: + set_request_provider_data( + {"X-LlamaStack-ProviderData": json.dumps(provider_data)} + ) + return impls @@ -97,6 +109,7 @@ async def stack_impls(model): {"model": Llama_8B}, {"model": Llama_3B}, ], + ids=lambda d: d["model"], ) async def inference_settings(request): model = request.param["model"] From dba7caf1d0a38a4af71130d66386c5da1cafc093 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 17:43:47 -0700 Subject: [PATCH 51/69] Fix fireworks and update the test Don't look for eom_id / eot_id sadly since providers don't return the last token --- .../adapters/inference/fireworks/fireworks.py | 36 +++++++++---------- .../adapters/inference/together/together.py | 11 ------ .../inference/provider_config_example.yaml | 10 ++++++ .../tests/inference/test_inference.py | 17 +++++---- 4 files changed, 37 insertions(+), 37 deletions(-) diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index 061e281be..654cd345c 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -27,6 +27,8 @@ FIREWORKS_SUPPORTED_MODELS = { "Llama3.1-8B-Instruct": "fireworks/llama-v3p1-8b-instruct", "Llama3.1-70B-Instruct": "fireworks/llama-v3p1-70b-instruct", "Llama3.1-405B-Instruct": "fireworks/llama-v3p1-405b-instruct", + "Llama3.2-1B-Instruct": "fireworks/llama-v3p2-1b-instruct", + "Llama3.2-3B-Instruct": "fireworks/llama-v3p2-3b-instruct", } @@ -36,8 +38,8 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): self, stack_to_provider_models_map=FIREWORKS_SUPPORTED_MODELS ) self.config = config - tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(tokenizer) + self.tokenizer = Tokenizer.get_instance() + self.formatter = ChatFormat(self.tokenizer) @property def client(self) -> Fireworks: @@ -59,17 +61,6 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def _messages_to_fireworks_messages(self, messages: list[Message]) -> list: - fireworks_messages = [] - for message in messages: - if message.role == "ipython": - role = "tool" - else: - role = message.role - fireworks_messages.append({"role": role, "content": message.content}) - - return fireworks_messages - def get_fireworks_chat_options(self, request: ChatCompletionRequest) -> dict: options = {} if request.sampling_params is not None: @@ -102,15 +93,22 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ) messages = augment_messages_for_tools(request) + model_input = self.formatter.encode_dialog_prompt(messages) + prompt = self.tokenizer.decode(model_input.tokens) + # Fireworks always prepends with BOS + if prompt.startswith("<|begin_of_text|>"): + prompt = prompt[len("<|begin_of_text|>") :] # accumulate sampling params and other options to pass to fireworks options = self.get_fireworks_chat_options(request) + options.setdefault("max_tokens", 512) + fireworks_model = self.map_to_provider_model(request.model) if not request.stream: - r = await self.client.chat.completions.acreate( + r = await self.client.completion.acreate( model=fireworks_model, - messages=self._messages_to_fireworks_messages(messages), + prompt=prompt, stream=False, **options, ) @@ -122,7 +120,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): stop_reason = StopReason.out_of_tokens completion_message = self.formatter.decode_assistant_message_from_content( - r.choices[0].message.content, stop_reason + r.choices[0].text, stop_reason ) yield ChatCompletionResponse( @@ -141,9 +139,9 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ipython = False stop_reason = None - async for chunk in self.client.chat.completions.acreate( + async for chunk in self.client.completion.acreate( model=fireworks_model, - messages=self._messages_to_fireworks_messages(messages), + prompt=prompt, stream=True, **options, ): @@ -157,7 +155,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): stop_reason = StopReason.out_of_tokens break - text = chunk.choices[0].delta.content + text = chunk.choices[0].text if text is None: continue diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 73e0edc4e..5326d83d4 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -64,17 +64,6 @@ class TogetherInferenceAdapter( ) -> AsyncGenerator: raise NotImplementedError() - def _messages_to_together_messages(self, messages: list[Message]) -> list: - together_messages = [] - for message in messages: - if message.role == "ipython": - role = "tool" - else: - role = message.role - together_messages.append({"role": role, "content": message.content}) - - return together_messages - def get_together_chat_options(self, request: ChatCompletionRequest) -> dict: options = {} if request.sampling_params is not None: diff --git a/llama_stack/providers/tests/inference/provider_config_example.yaml b/llama_stack/providers/tests/inference/provider_config_example.yaml index 014ce84d4..8431b01ac 100644 --- a/llama_stack/providers/tests/inference/provider_config_example.yaml +++ b/llama_stack/providers/tests/inference/provider_config_example.yaml @@ -13,3 +13,13 @@ providers: config: host: localhost port: 7002 + - provider_id: test-together + provider_type: remote::together + config: {} +# if a provider needs private keys from the client, they use the +# "get_request_provider_data" function (see distribution/request_headers.py) +# this is a place to provide such data. +provider_data: + "test-together": + together_api_key: + 0xdeadbeefputrealapikeyhere diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 794cbaa2b..094ee5924 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -222,8 +222,9 @@ async def test_chat_completion_with_tool_calling( message = response[0].completion_message - stop_reason = get_expected_stop_reason(inference_settings["common_params"]["model"]) - assert message.stop_reason == stop_reason + # This is not supported in most providers :/ they don't return eom_id / eot_id + # stop_reason = get_expected_stop_reason(inference_settings["common_params"]["model"]) + # assert message.stop_reason == stop_reason assert message.tool_calls is not None assert len(message.tool_calls) > 0 @@ -266,10 +267,12 @@ async def test_chat_completion_with_tool_calling_streaming( assert len(grouped[ChatCompletionResponseEventType.complete]) == 1 end = grouped[ChatCompletionResponseEventType.complete][0] - expected_stop_reason = get_expected_stop_reason( - inference_settings["common_params"]["model"] - ) - assert end.event.stop_reason == expected_stop_reason + + # This is not supported in most providers :/ they don't return eom_id / eot_id + # expected_stop_reason = get_expected_stop_reason( + # inference_settings["common_params"]["model"] + # ) + # assert end.event.stop_reason == expected_stop_reason model = inference_settings["common_params"]["model"] if "Llama3.1" in model: @@ -281,7 +284,7 @@ async def test_chat_completion_with_tool_calling_streaming( assert first.event.delta.parse_status == ToolCallParseStatus.started last = grouped[ChatCompletionResponseEventType.progress][-1] - assert last.event.stop_reason == expected_stop_reason + # assert last.event.stop_reason == expected_stop_reason assert last.event.delta.parse_status == ToolCallParseStatus.success assert isinstance(last.event.delta.content, ToolCall) From 4ab6e1b81aae3538c08d3014369ce7f8c71b01d9 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 22:34:53 -0700 Subject: [PATCH 52/69] Add really basic testing for memory API weaviate does not work; the cluster URL seems malformed --- llama_stack/distribution/resolver.py | 7 +- .../distribution/routers/routing_tables.py | 6 +- .../adapters/memory/weaviate/__init__.py | 11 +- llama_stack/providers/datatypes.py | 2 - llama_stack/providers/registry/memory.py | 1 + .../tests/inference/test_inference.py | 85 +++------------ .../providers/tests/memory/__init__.py | 5 + .../tests/memory/provider_config_example.yaml | 24 +++++ .../providers/tests/memory/test_memory.py | 60 +++++++++++ llama_stack/providers/tests/resolver.py | 100 ++++++++++++++++++ 10 files changed, 220 insertions(+), 81 deletions(-) create mode 100644 llama_stack/providers/tests/memory/__init__.py create mode 100644 llama_stack/providers/tests/memory/provider_config_example.yaml create mode 100644 llama_stack/providers/tests/memory/test_memory.py create mode 100644 llama_stack/providers/tests/resolver.py diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 2d3679177..4db72d29e 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -147,10 +147,9 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An inner_impls = {} if isinstance(provider.spec, RoutingTableProviderSpec): - for entry in provider.spec.registry: - inner_impls[entry.provider_id] = inner_impls_by_provider_id[ - f"inner-{provider.spec.router_api.value}" - ][entry.provider_id] + inner_impls = inner_impls_by_provider_id[ + f"inner-{provider.spec.router_api.value}" + ] impl = await instantiate_provider( provider, diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 3d89aa19f..73e26dd2e 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -70,8 +70,12 @@ class CommonRoutingTableImpl(RoutingTable): def get_provider_impl(self, routing_key: str) -> Any: if routing_key not in self.routing_key_to_object: - raise ValueError(f"Could not find provider for {routing_key}") + raise ValueError(f"Object `{routing_key}` not registered") + obj = self.routing_key_to_object[routing_key] + if obj.provider_id not in self.impls_by_provider_id: + raise ValueError(f"Provider `{obj.provider_id}` not found") + return self.impls_by_provider_id[obj.provider_id] def get_object_by_identifier(self, identifier: str) -> Optional[RoutableObject]: diff --git a/llama_stack/providers/adapters/memory/weaviate/__init__.py b/llama_stack/providers/adapters/memory/weaviate/__init__.py index b564eabf4..504bd1508 100644 --- a/llama_stack/providers/adapters/memory/weaviate/__init__.py +++ b/llama_stack/providers/adapters/memory/weaviate/__init__.py @@ -1,8 +1,15 @@ -from .config import WeaviateConfig +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .config import WeaviateConfig, WeaviateRequestProviderData # noqa: F401 + async def get_adapter_impl(config: WeaviateConfig, _deps): from .weaviate import WeaviateMemoryAdapter impl = WeaviateMemoryAdapter(config) await impl.initialize() - return impl \ No newline at end of file + return impl diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index a254e2808..0c8f6ad21 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -43,8 +43,6 @@ class ProviderSpec(BaseModel): class RoutingTable(Protocol): - def get_routing_keys(self) -> List[str]: ... - def get_provider_impl(self, routing_key: str) -> Any: ... diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index a3f0bdb6f..a8d776c3f 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -62,6 +62,7 @@ def available_providers() -> List[ProviderSpec]: adapter_type="weaviate", pip_packages=EMBEDDING_DEPS + ["weaviate-client"], module="llama_stack.providers.adapters.memory.weaviate", + config_class="llama_stack.providers.adapters.memory.weaviate.WeaviateConfig", provider_data_validator="llama_stack.providers.adapters.memory.weaviate.WeaviateRequestProviderData", ), ), diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 094ee5924..de8241b20 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -5,21 +5,15 @@ # the root directory of this source tree. import itertools -import json -import os -from datetime import datetime import pytest import pytest_asyncio -import yaml from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 -from llama_stack.distribution.configure import parse_and_maybe_upgrade_config -from llama_stack.distribution.request_headers import set_request_provider_data -from llama_stack.distribution.resolver import resolve_impls_with_routing +from llama_stack.providers.tests.resolver import resolve_impls_for_test def group_chunks(response): @@ -39,68 +33,6 @@ def get_expected_stop_reason(model: str): return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn -async def stack_impls(model): - if "PROVIDER_CONFIG" not in os.environ: - raise ValueError( - "You must set PROVIDER_CONFIG to a YAML file containing provider config" - ) - - with open(os.environ["PROVIDER_CONFIG"], "r") as f: - config_dict = yaml.safe_load(f) - - if "providers" not in config_dict: - raise ValueError("Config file should contain a `providers` key") - - providers_by_id = {x["provider_id"]: x for x in config_dict["providers"]} - if len(providers_by_id) == 0: - raise ValueError("No providers found in config file") - - if "PROVIDER_ID" in os.environ: - provider_id = os.environ["PROVIDER_ID"] - if provider_id not in providers_by_id: - raise ValueError(f"Provider ID {provider_id} not found in config file") - provider = providers_by_id[provider_id] - else: - provider = list(providers_by_id.values())[0] - provider_id = provider["provider_id"] - print(f"No provider ID specified, picking first `{provider_id}`") - - run_config = dict( - built_at=datetime.now(), - image_name="test-fixture", - apis=[ - Api.inference, - Api.models, - ], - providers=dict( - inference=[ - Provider(**provider), - ] - ), - models=[ - ModelDef( - identifier=model, - llama_model=model, - provider_id=provider["provider_id"], - ) - ], - shields=[], - memory_banks=[], - ) - run_config = parse_and_maybe_upgrade_config(run_config) - impls = await resolve_impls_with_routing(run_config) - - # may need something cleaner here - if "provider_data" in config_dict: - provider_data = config_dict["provider_data"].get(provider_id, {}) - if provider_data: - set_request_provider_data( - {"X-LlamaStack-ProviderData": json.dumps(provider_data)} - ) - - return impls - - # This is going to create multiple Stack impls without tearing down the previous one # Fix that! @pytest_asyncio.fixture( @@ -113,7 +45,17 @@ async def stack_impls(model): ) async def inference_settings(request): model = request.param["model"] - impls = await stack_impls(model) + impls = await resolve_impls_for_test( + Api.inference, + models=[ + ModelDef( + identifier=model, + llama_model=model, + provider_id="", + ) + ], + ) + return { "impl": impls[Api.inference], "common_params": { @@ -266,12 +208,11 @@ async def test_chat_completion_with_tool_calling_streaming( assert len(grouped[ChatCompletionResponseEventType.progress]) > 0 assert len(grouped[ChatCompletionResponseEventType.complete]) == 1 - end = grouped[ChatCompletionResponseEventType.complete][0] - # This is not supported in most providers :/ they don't return eom_id / eot_id # expected_stop_reason = get_expected_stop_reason( # inference_settings["common_params"]["model"] # ) + # end = grouped[ChatCompletionResponseEventType.complete][0] # assert end.event.stop_reason == expected_stop_reason model = inference_settings["common_params"]["model"] diff --git a/llama_stack/providers/tests/memory/__init__.py b/llama_stack/providers/tests/memory/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/tests/memory/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. diff --git a/llama_stack/providers/tests/memory/provider_config_example.yaml b/llama_stack/providers/tests/memory/provider_config_example.yaml new file mode 100644 index 000000000..cac1adde5 --- /dev/null +++ b/llama_stack/providers/tests/memory/provider_config_example.yaml @@ -0,0 +1,24 @@ +providers: + - provider_id: test-faiss + provider_type: meta-reference + config: {} + - provider_id: test-chroma + provider_type: remote::chroma + config: + host: localhost + port: 6001 + - provider_id: test-remote + provider_type: remote + config: + host: localhost + port: 7002 + - provider_id: test-weaviate + provider_type: remote::weaviate + config: {} +# if a provider needs private keys from the client, they use the +# "get_request_provider_data" function (see distribution/request_headers.py) +# this is a place to provide such data. +provider_data: + "test-weaviate": + weaviate_api_key: 0xdeadbeefputrealapikeyhere + weaviate_cluster_url: http://foobarbaz diff --git a/llama_stack/providers/tests/memory/test_memory.py b/llama_stack/providers/tests/memory/test_memory.py new file mode 100644 index 000000000..4f6dadb14 --- /dev/null +++ b/llama_stack/providers/tests/memory/test_memory.py @@ -0,0 +1,60 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import pytest +import pytest_asyncio + +from llama_stack.apis.memory import * # noqa: F403 +from llama_stack.distribution.datatypes import * # noqa: F403 +from llama_stack.providers.tests.resolver import resolve_impls_for_test + + +@pytest_asyncio.fixture(scope="session") +async def memory_impl(): + impls = await resolve_impls_for_test( + Api.memory, + memory_banks=[], + ) + return impls[Api.memory] + + +@pytest.fixture +def sample_document(): + return MemoryBankDocument( + document_id="doc1", + content="This is a sample document for testing.", + mime_type="text/plain", + metadata={"author": "Test Author"}, + ) + + +async def register_memory_bank(memory_impl: Memory): + bank = VectorMemoryBankDef( + identifier="test_bank", + provider_id="", + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + overlap_size_in_tokens=64, + ) + + await memory_impl.register_memory_bank(bank) + + +@pytest.mark.asyncio +async def test_query_documents(memory_impl, sample_document): + with pytest.raises(ValueError): + await memory_impl.insert_documents("test_bank", [sample_document]) + + await register_memory_bank(memory_impl) + await memory_impl.insert_documents("test_bank", [sample_document]) + + query = ["sample ", "document"] + response = await memory_impl.query_documents("test_bank", query) + + assert isinstance(response, QueryDocumentsResponse) + assert len(response.chunks) > 0 + assert len(response.scores) > 0 + assert len(response.chunks) == len(response.scores) diff --git a/llama_stack/providers/tests/resolver.py b/llama_stack/providers/tests/resolver.py new file mode 100644 index 000000000..266f252e4 --- /dev/null +++ b/llama_stack/providers/tests/resolver.py @@ -0,0 +1,100 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +import json +import os +from datetime import datetime + +import yaml + +from llama_stack.distribution.datatypes import * # noqa: F403 +from llama_stack.distribution.configure import parse_and_maybe_upgrade_config +from llama_stack.distribution.request_headers import set_request_provider_data +from llama_stack.distribution.resolver import resolve_impls_with_routing + + +async def resolve_impls_for_test( + api: Api, + models: List[ModelDef] = None, + memory_banks: List[MemoryBankDef] = None, + shields: List[ShieldDef] = None, +): + if "PROVIDER_CONFIG" not in os.environ: + raise ValueError( + "You must set PROVIDER_CONFIG to a YAML file containing provider config" + ) + + with open(os.environ["PROVIDER_CONFIG"], "r") as f: + config_dict = yaml.safe_load(f) + + if "providers" not in config_dict: + raise ValueError("Config file should contain a `providers` key") + + providers_by_id = {x["provider_id"]: x for x in config_dict["providers"]} + if len(providers_by_id) == 0: + raise ValueError("No providers found in config file") + + if "PROVIDER_ID" in os.environ: + provider_id = os.environ["PROVIDER_ID"] + if provider_id not in providers_by_id: + raise ValueError(f"Provider ID {provider_id} not found in config file") + provider = providers_by_id[provider_id] + else: + provider = list(providers_by_id.values())[0] + provider_id = provider["provider_id"] + print(f"No provider ID specified, picking first `{provider_id}`") + + models = models or [] + shields = shields or [] + memory_banks = memory_banks or [] + + models = [ + ModelDef( + **{ + **m.dict(), + "provider_id": provider_id, + } + ) + for m in models + ] + shields = [ + ShieldDef( + **{ + **s.dict(), + "provider_id": provider_id, + } + ) + for s in shields + ] + memory_banks = [ + MemoryBankDef( + **{ + **m.dict(), + "provider_id": provider_id, + } + ) + for m in memory_banks + ] + run_config = dict( + built_at=datetime.now(), + image_name="test-fixture", + apis=[api], + providers={api.value: [Provider(**provider)]}, + models=models, + memory_banks=memory_banks, + shields=shields, + ) + run_config = parse_and_maybe_upgrade_config(run_config) + impls = await resolve_impls_with_routing(run_config) + + if "provider_data" in config_dict: + provider_data = config_dict["provider_data"].get(provider_id, {}) + if provider_data: + set_request_provider_data( + {"X-LlamaStack-ProviderData": json.dumps(provider_data)} + ) + + return impls From f21ad1173e0cb9332c84a4c9ba2ad3b9da4872a5 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 23:09:31 -0700 Subject: [PATCH 53/69] improve memory test, but it fails on chromadb :/ --- .../adapters/memory/chroma/chroma.py | 2 +- .../providers/tests/memory/test_memory.py | 67 +++++++++++++++---- 2 files changed, 56 insertions(+), 13 deletions(-) diff --git a/llama_stack/providers/adapters/memory/chroma/chroma.py b/llama_stack/providers/adapters/memory/chroma/chroma.py index f720159a5..f8af9ac5c 100644 --- a/llama_stack/providers/adapters/memory/chroma/chroma.py +++ b/llama_stack/providers/adapters/memory/chroma/chroma.py @@ -99,7 +99,7 @@ class ChromaMemoryAdapter(Memory): memory_bank.type == MemoryBankType.vector.value ), f"Only vector banks are supported {memory_bank.type}" - collection = await self.client.create_collection( + collection = await self.client.get_or_create_collection( name=memory_bank.identifier, ) bank_index = BankWithIndex( diff --git a/llama_stack/providers/tests/memory/test_memory.py b/llama_stack/providers/tests/memory/test_memory.py index 4f6dadb14..1e9db2161 100644 --- a/llama_stack/providers/tests/memory/test_memory.py +++ b/llama_stack/providers/tests/memory/test_memory.py @@ -22,13 +22,29 @@ async def memory_impl(): @pytest.fixture -def sample_document(): - return MemoryBankDocument( - document_id="doc1", - content="This is a sample document for testing.", - mime_type="text/plain", - metadata={"author": "Test Author"}, - ) +def sample_documents(): + return [ + MemoryBankDocument( + document_id="doc1", + content="Python is a high-level programming language.", + metadata={"category": "programming", "difficulty": "beginner"}, + ), + MemoryBankDocument( + document_id="doc2", + content="Machine learning is a subset of artificial intelligence.", + metadata={"category": "AI", "difficulty": "advanced"}, + ), + MemoryBankDocument( + document_id="doc3", + content="Data structures are fundamental to computer science.", + metadata={"category": "computer science", "difficulty": "intermediate"}, + ), + MemoryBankDocument( + document_id="doc4", + content="Neural networks are inspired by biological neural networks.", + metadata={"category": "AI", "difficulty": "advanced"}, + ), + ] async def register_memory_bank(memory_impl: Memory): @@ -44,17 +60,44 @@ async def register_memory_bank(memory_impl: Memory): @pytest.mark.asyncio -async def test_query_documents(memory_impl, sample_document): +async def test_query_documents(memory_impl, sample_documents): with pytest.raises(ValueError): - await memory_impl.insert_documents("test_bank", [sample_document]) + await memory_impl.insert_documents("test_bank", sample_documents) await register_memory_bank(memory_impl) - await memory_impl.insert_documents("test_bank", [sample_document]) + await memory_impl.insert_documents("test_bank", sample_documents) - query = ["sample ", "document"] - response = await memory_impl.query_documents("test_bank", query) + query1 = "programming language" + response1 = await memory_impl.query_documents("test_bank", query1) + assert_valid_response(response1) + assert any("Python" in chunk.content for chunk in response1.chunks) + # Test case 3: Query with semantic similarity + query3 = "AI and brain-inspired computing" + response3 = await memory_impl.query_documents("test_bank", query3) + assert_valid_response(response3) + assert any("neural networks" in chunk.content.lower() for chunk in response3.chunks) + + # Test case 4: Query with limit on number of results + query4 = "computer" + params4 = {"max_chunks": 2} + response4 = await memory_impl.query_documents("test_bank", query4, params4) + assert_valid_response(response4) + assert len(response4.chunks) <= 2 + + # Test case 5: Query with threshold on similarity score + query5 = "quantum computing" # Not directly related to any document + params5 = {"score_threshold": 0.5} + response5 = await memory_impl.query_documents("test_bank", query5, params5) + assert_valid_response(response5) + assert all(score >= 0.5 for score in response5.scores) + + +def assert_valid_response(response: QueryDocumentsResponse): assert isinstance(response, QueryDocumentsResponse) assert len(response.chunks) > 0 assert len(response.scores) > 0 assert len(response.chunks) == len(response.scores) + for chunk in response.chunks: + assert isinstance(chunk.content, str) + assert chunk.document_id is not None From f8752ab8dcc77cd77e2192e1e6380bbebd06d54a Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 09:54:00 -0700 Subject: [PATCH 54/69] weaviate fixes, test now passes --- .../adapters/memory/weaviate/weaviate.py | 30 ++++++++++++++----- .../tests/inference/test_inference.py | 17 +++++++++++ .../providers/tests/memory/test_memory.py | 17 +++++++++++ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/adapters/memory/weaviate/weaviate.py b/llama_stack/providers/adapters/memory/weaviate/weaviate.py index 573802c84..8f5cafdc5 100644 --- a/llama_stack/providers/adapters/memory/weaviate/weaviate.py +++ b/llama_stack/providers/adapters/memory/weaviate/weaviate.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +import json from typing import Any, Dict, List, Optional @@ -36,7 +37,7 @@ class WeaviateIndex(EmbeddingIndex): data_objects.append( wvc.data.DataObject( properties={ - "chunk_content": chunk, + "chunk_content": chunk.json(), }, vector=embeddings[i].tolist(), ) @@ -44,7 +45,9 @@ class WeaviateIndex(EmbeddingIndex): # Inserting chunks into a prespecified Weaviate collection collection = self.client.collections.get(self.collection_name) - await collection.data.insert_many(data_objects) + + # TODO: make this async friendly + collection.data.insert_many(data_objects) async def query(self, embedding: NDArray, k: int) -> QueryDocumentsResponse: collection = self.client.collections.get(self.collection_name) @@ -52,13 +55,23 @@ class WeaviateIndex(EmbeddingIndex): results = collection.query.near_vector( near_vector=embedding.tolist(), limit=k, - return_meta_data=wvc.query.MetadataQuery(distance=True), + return_metadata=wvc.query.MetadataQuery(distance=True), ) chunks = [] scores = [] for doc in results.objects: - chunk = doc.properties["chunk_content"] + chunk_json = doc.properties["chunk_content"] + try: + chunk_dict = json.loads(chunk_json) + chunk = Chunk(**chunk_dict) + except Exception: + import traceback + + traceback.print_exc() + print(f"Failed to parse document: {chunk_json}") + continue + chunks.append(chunk) scores.append(1.0 / doc.metadata.distance) @@ -102,12 +115,12 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): memory_bank.type == MemoryBankType.vector.value ), f"Only vector banks are supported {memory_bank.type}" - client = await self._get_client() + client = self._get_client() # Create collection if it doesn't exist if not client.collections.exists(memory_bank.identifier): client.collections.create( - name=smemory_bank.identifier, + name=memory_bank.identifier, vectorizer_config=wvc.config.Configure.Vectorizer.none(), properties=[ wvc.config.Property( @@ -121,7 +134,7 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): bank=memory_bank, index=WeaviateIndex(client=client, collection_name=memory_bank.identifier), ) - self.cache[bank_id] = index + self.cache[memory_bank.identifier] = index async def _get_and_cache_bank_index(self, bank_id: str) -> Optional[BankWithIndex]: if bank_id in self.cache: @@ -131,7 +144,7 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): if not bank: raise ValueError(f"Bank {bank_id} not found") - client = await self._get_client() + client = self._get_client() if not client.collections.exists(bank_id): raise ValueError(f"Collection with name `{bank_id}` not found") @@ -146,6 +159,7 @@ class WeaviateMemoryAdapter(Memory, NeedsRequestProviderData): self, bank_id: str, documents: List[MemoryBankDocument], + ttl_seconds: Optional[int] = None, ) -> None: index = await self._get_and_cache_bank_index(bank_id) if not index: diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index de8241b20..38b9ff860 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -15,6 +15,23 @@ from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.providers.tests.resolver import resolve_impls_for_test +# How to run this test: +# +# 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky +# since it depends on the provider you are testing. On top of that you need +# `pytest` and `pytest-asyncio` installed. +# +# 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing. +# +# 3. Run: +# +# ```bash +# PROVIDER_ID= \ +# PROVIDER_CONFIG=provider_config.yaml \ +# pytest -s llama_stack/providers/tests/memory/test_inference.py \ +# --tb=short --disable-warnings +# ``` + def group_chunks(response): return { diff --git a/llama_stack/providers/tests/memory/test_memory.py b/llama_stack/providers/tests/memory/test_memory.py index 1e9db2161..4351ae699 100644 --- a/llama_stack/providers/tests/memory/test_memory.py +++ b/llama_stack/providers/tests/memory/test_memory.py @@ -11,6 +11,23 @@ from llama_stack.apis.memory import * # noqa: F403 from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.providers.tests.resolver import resolve_impls_for_test +# How to run this test: +# +# 1. Ensure you have a conda with the right dependencies installed. This is a bit tricky +# since it depends on the provider you are testing. On top of that you need +# `pytest` and `pytest-asyncio` installed. +# +# 2. Copy and modify the provider_config_example.yaml depending on the provider you are testing. +# +# 3. Run: +# +# ```bash +# PROVIDER_ID= \ +# PROVIDER_CONFIG=provider_config.yaml \ +# pytest -s llama_stack/providers/tests/memory/test_memory.py \ +# --tb=short --disable-warnings +# ``` + @pytest_asyncio.fixture(scope="session") async def memory_impl(): From 0c9eb3341c025c94b2d18d0463253ed844bf5a54 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 10:52:16 -0700 Subject: [PATCH 55/69] Separate chat_completion stream and non-stream implementations This is a pretty important requirement. The streaming response type is an AsyncGenerator while the non-stream one is a single object. So far this has worked _sometimes_ due to various pre-existing hacks (and in some cases, just failed.) --- llama_stack/apis/inference/client.py | 89 +++--- llama_stack/apis/inference/inference.py | 8 +- llama_stack/distribution/routers/routers.py | 23 +- .../adapters/inference/ollama/ollama.py | 256 +++++++++-------- .../meta_reference/inference/inference.py | 257 ++++++++++-------- 5 files changed, 346 insertions(+), 287 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 8b822058f..c7b865ebf 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -42,10 +42,10 @@ class InferenceClient(Inference): async def shutdown(self) -> None: pass - async def completion(self, request: CompletionRequest) -> AsyncGenerator: + def completion(self, request: CompletionRequest) -> AsyncGenerator: raise NotImplementedError() - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -66,48 +66,57 @@ class InferenceClient(Inference): stream=stream, logprobs=logprobs, ) + if stream: + return self._stream_chat_completion(request) + else: + return self._nonstream_chat_completion(request) + + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest + ) -> ChatCompletionResponse: async with httpx.AsyncClient() as client: - if stream: - async with client.stream( - "POST", - f"{self.base_url}/inference/chat_completion", - json=encodable_dict(request), - headers={"Content-Type": "application/json"}, - timeout=20, - ) as response: - if response.status_code != 200: - content = await response.aread() - cprint( - f"Error: HTTP {response.status_code} {content.decode()}", - "red", - ) - return + response = await client.post( + f"{self.base_url}/inference/chat_completion", + json=encodable_dict(request), + headers={"Content-Type": "application/json"}, + timeout=20, + ) - async for line in response.aiter_lines(): - if line.startswith("data:"): - data = line[len("data: ") :] - try: - if "error" in data: - cprint(data, "red") - continue + response.raise_for_status() + j = response.json() + yield ChatCompletionResponse(**j) - yield ChatCompletionResponseStreamChunk( - **json.loads(data) - ) - except Exception as e: - print(data) - print(f"Error with parsing or validation: {e}") - else: - response = await client.post( - f"{self.base_url}/inference/chat_completion", - json=encodable_dict(request), - headers={"Content-Type": "application/json"}, - timeout=20, - ) + async def _stream_chat_completion( + self, request: ChatCompletionRequest + ) -> AsyncGenerator: + async with httpx.AsyncClient() as client: + async with client.stream( + "POST", + f"{self.base_url}/inference/chat_completion", + json=encodable_dict(request), + headers={"Content-Type": "application/json"}, + timeout=20, + ) as response: + if response.status_code != 200: + content = await response.aread() + cprint( + f"Error: HTTP {response.status_code} {content.decode()}", + "red", + ) + return - response.raise_for_status() - j = response.json() - yield ChatCompletionResponse(**j) + async for line in response.aiter_lines(): + if line.startswith("data:"): + data = line[len("data: ") :] + try: + if "error" in data: + cprint(data, "red") + continue + + yield ChatCompletionResponseStreamChunk(**json.loads(data)) + except Exception as e: + print(data) + print(f"Error with parsing or validation: {e}") async def run_main( diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 7ff70a2af..13a51bc59 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -180,8 +180,10 @@ class ModelStore(Protocol): class Inference(Protocol): model_store: ModelStore + # This method is not `async def` because it can result in either an + # `AsyncGenerator` or a `CompletionResponse` depending on the value of `stream`. @webmethod(route="/inference/completion") - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, @@ -190,8 +192,10 @@ class Inference(Protocol): logprobs: Optional[LogProbConfig] = None, ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ... + # This method is not `async def` because it can result in either an + # `AsyncGenerator` or a `ChatCompletionResponse` depending on the value of `stream`. @webmethod(route="/inference/chat_completion") - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index 361cee3f3..cf62da1d0 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -70,7 +70,7 @@ class InferenceRouter(Inference): async def register_model(self, model: ModelDef) -> None: await self.routing_table.register_model(model) - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -91,27 +91,32 @@ class InferenceRouter(Inference): stream=stream, logprobs=logprobs, ) - # TODO: we need to fix streaming response to align provider implementations with Protocol. - async for chunk in self.routing_table.get_provider_impl(model).chat_completion( - **params - ): - yield chunk + provider = self.routing_table.get_provider_impl(model) + if stream: + return (chunk async for chunk in provider.chat_completion(**params)) + else: + return provider.chat_completion(**params) - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, sampling_params: Optional[SamplingParams] = SamplingParams(), stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, - ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: - return await self.routing_table.get_provider_impl(model).completion( + ) -> AsyncGenerator: + provider = self.routing_table.get_provider_impl(model) + params = dict( model=model, content=content, sampling_params=sampling_params, stream=stream, logprobs=logprobs, ) + if stream: + return (chunk async for chunk in provider.completion(**params)) + else: + return provider.completion(**params) async def embeddings( self, diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 40a3f5977..80d2ad4c8 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -55,7 +55,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, @@ -79,7 +79,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): return options - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -90,24 +90,7 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: - request = ChatCompletionRequest( - model=model, - messages=messages, - sampling_params=sampling_params, - tools=tools or [], - tool_choice=tool_choice, - tool_prompt_format=tool_prompt_format, - stream=stream, - logprobs=logprobs, - ) - - messages = augment_messages_for_tools(request) - model_input = self.formatter.encode_dialog_prompt(messages) - prompt = self.tokenizer.decode(model_input.tokens) - - # accumulate sampling params and other options to pass to ollama - options = self.get_ollama_chat_options(request) - ollama_model = self.map_to_provider_model(request.model) + ollama_model = self.map_to_provider_model(model) res = await self.client.ps() need_model_pull = True @@ -123,133 +106,166 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): status["status"] == "success" ), f"Failed to pull model {self.model} in ollama" - common_params = { - "model": ollama_model, + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools or [], + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + + if stream: + return self._stream_chat_completion(request) + else: + return self._nonstream_chat_completion(request) + + def _get_params(self, request: ChatCompletionRequest) -> dict: + messages = augment_messages_for_tools(request) + model_input = self.formatter.encode_dialog_prompt(messages) + prompt = self.tokenizer.decode(model_input.tokens) + + # accumulate sampling params and other options to pass to ollama + options = self.get_ollama_chat_options(request) + + return { + "model": self.map_to_provider_model(request.model), "prompt": prompt, "options": options, "raw": True, "stream": request.stream, } - if not request.stream: - r = await self.client.generate(**common_params) - stop_reason = None - if r["done"]: - if r["done_reason"] == "stop": + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = await self.client.generate(**params) + stop_reason = None + if r["done"]: + if r["done_reason"] == "stop": + stop_reason = StopReason.end_of_turn + elif r["done_reason"] == "length": + stop_reason = StopReason.out_of_tokens + + completion_message = self.formatter.decode_assistant_message_from_content( + r["response"], stop_reason + ) + return ChatCompletionResponse( + completion_message=completion_message, + logprobs=None, + ) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest + ) -> AsyncGenerator: + params = self._get_params(request) + + stream = await self.client.generate(**params) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + + buffer = "" + ipython = False + stop_reason = None + + async for chunk in stream: + if chunk["done"]: + if stop_reason is None and chunk["done_reason"] == "stop": stop_reason = StopReason.end_of_turn - elif r["done_reason"] == "length": + elif stop_reason is None and chunk["done_reason"] == "length": stop_reason = StopReason.out_of_tokens + break - completion_message = self.formatter.decode_assistant_message_from_content( - r["response"], stop_reason - ) - yield ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, - ) - else: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - stream = await self.client.generate(**common_params) - - buffer = "" - ipython = False - stop_reason = None - - async for chunk in stream: - if chunk["done"]: - if stop_reason is None and chunk["done_reason"] == "stop": - stop_reason = StopReason.end_of_turn - elif stop_reason is None and chunk["done_reason"] == "length": - stop_reason = StopReason.out_of_tokens - break - - text = chunk["response"] - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue - - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text, - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: + text = chunk["response"] + # check if its a tool call ( aka starts with <|python_tag|> ) + if not ipython and text.startswith("<|python_tag|>"): + ipython = True yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( event_type=ChatCompletionResponseEventType.progress, delta=ToolCallDelta( content="", - parse_status=ToolCallParseStatus.failure, + parse_status=ToolCallParseStatus.started, ), - stop_reason=stop_reason, ) ) + buffer += text + continue + + if ipython: + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + + buffer += text + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) - for tool_call in message.tool_calls: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), + delta=delta, + stop_reason=stop_reason, + ) + ) + else: + buffer += text + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=text, stop_reason=stop_reason, ) ) + # parse tool calls and report errors + message = self.formatter.decode_assistant_message_from_content( + buffer, stop_reason + ) + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), stop_reason=stop_reason, ) ) + + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index a310a479a..ad8cc31fd 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -46,9 +46,7 @@ class MetaReferenceInferenceImpl(Inference): async def shutdown(self) -> None: self.generator.stop() - # hm, when stream=False, we should not be doing SSE :/ which is what the - # top-level server is going to do. make the typing more specific here - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -59,6 +57,9 @@ class MetaReferenceInferenceImpl(Inference): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: + if logprobs: + assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}" + # wrapper request to make it easier to pass around (internal only, not exposed to API) request = ChatCompletionRequest( model=model, @@ -71,7 +72,6 @@ class MetaReferenceInferenceImpl(Inference): logprobs=logprobs, ) - messages = augment_messages_for_tools(request) model = resolve_model(request.model) if model is None: raise RuntimeError( @@ -87,138 +87,163 @@ class MetaReferenceInferenceImpl(Inference): async with SEMAPHORE: if request.stream: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", + return self._stream_chat_completion(request) + else: + return self._nonstream_chat_completion(request) + + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest + ) -> ChatCompletionResponse: + messages = augment_messages_for_tools(request) + + tokens = [] + logprobs = [] + stop_reason = None + + for token_result in self.generator.chat_completion( + messages=messages, + temperature=request.sampling_params.temperature, + top_p=request.sampling_params.top_p, + max_gen_len=request.sampling_params.max_tokens, + logprobs=request.logprobs, + tool_prompt_format=request.tool_prompt_format, + ): + tokens.append(token_result.token) + + if token_result.text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + elif token_result.text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + + if request.logprobs: + assert len(token_result.logprobs) == 1 + + logprobs.append( + TokenLogProbs( + logprobs_by_token={token_result.text: token_result.logprobs[0]} ) ) - tokens = [] - logprobs = [] + if stop_reason is None: + stop_reason = StopReason.out_of_tokens - stop_reason = None + message = self.generator.formatter.decode_assistant_message(tokens, stop_reason) + return ChatCompletionResponse( + completion_message=message, + logprobs=logprobs if request.logprobs else None, + ) - buffer = "" - ipython = False + async def _stream_chat_completion( + self, request: ChatCompletionRequest + ) -> AsyncGenerator: + messages = augment_messages_for_tools(request) - for token_result in self.generator.chat_completion( - messages=messages, - temperature=request.sampling_params.temperature, - top_p=request.sampling_params.top_p, - max_gen_len=request.sampling_params.max_tokens, - logprobs=request.logprobs, - tool_prompt_format=request.tool_prompt_format, - ): - buffer += token_result.text - tokens.append(token_result.token) + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) - if not ipython and buffer.startswith("<|python_tag|>"): - ipython = True - if request.stream: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) + tokens = [] + logprobs = [] + stop_reason = None + ipython = False - buffer = buffer[len("<|python_tag|>") :] - continue + for token_result in self.generator.chat_completion( + messages=messages, + temperature=request.sampling_params.temperature, + top_p=request.sampling_params.top_p, + max_gen_len=request.sampling_params.max_tokens, + logprobs=request.logprobs, + tool_prompt_format=request.tool_prompt_format, + ): + tokens.append(token_result.token) - if not request.stream: - if request.logprobs: - assert ( - len(token_result.logprobs) == 1 - ), "Expected logprob to contain 1 result for the current token" - assert ( - request.logprobs.top_k == 1 - ), "Only top_k=1 is supported for LogProbConfig" - - logprobs.append( - TokenLogProbs( - logprobs_by_token={ - token_result.text: token_result.logprobs[0] - } - ) - ) - - continue - - if token_result.text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - elif token_result.text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - else: - text = token_result.text - - if ipython: - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, + if not ipython and token_result.text.startswith("<|python_tag|>"): + ipython = True + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), ) - else: - delta = text + ) + continue - if stop_reason is None: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) + if token_result.text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + elif token_result.text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + else: + text = token_result.text + + if ipython: + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) + else: + delta = text if stop_reason is None: - stop_reason = StopReason.out_of_tokens + if request.logprobs: + assert len(token_result.logprobs) == 1 - # TODO(ashwin): parse tool calls separately here and report errors? - # if someone breaks the iteration before coming here we are toast - message = self.generator.formatter.decode_assistant_message( - tokens, stop_reason - ) - if request.stream: - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } ) ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", + event_type=ChatCompletionResponseEventType.progress, + delta=delta, stop_reason=stop_reason, + logprobs=logprobs if request.logprobs else None, ) ) - # TODO(ashwin): what else do we need to send out here when everything finishes? - else: - yield ChatCompletionResponse( - completion_message=message, - logprobs=logprobs if request.logprobs else None, + if stop_reason is None: + stop_reason = StopReason.out_of_tokens + + message = self.generator.formatter.decode_assistant_message(tokens, stop_reason) + + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), + stop_reason=stop_reason, ) + ) + + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) From 05e73d12b3940bf5dad4692f9b916dd94941b5b0 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 12:15:55 -0700 Subject: [PATCH 56/69] introduce openai_compat with the completions (not chat-completions) API This keeps the prompt encoding layer in our control (see `chat_completion_request_to_prompt()` method) --- .../adapters/inference/fireworks/fireworks.py | 212 ++++------------- .../adapters/inference/ollama/ollama.py | 221 +++++------------- .../adapters/inference/together/together.py | 193 ++++----------- .../tests/inference/test_inference.py | 44 ++-- .../utils/inference/augment_messages.py | 10 +- .../utils/inference/openai_compat.py | 187 +++++++++++++++ 6 files changed, 354 insertions(+), 513 deletions(-) create mode 100644 llama_stack/providers/utils/inference/openai_compat.py diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index 654cd345c..ce57480a0 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -10,14 +10,19 @@ from fireworks.client import Fireworks from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import Message, StopReason +from llama_models.llama3.api.datatypes import Message from llama_models.llama3.api.tokenizer import Tokenizer -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - from llama_stack.apis.inference import * # noqa: F403 + from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_prompt, +) +from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + process_chat_completion_response, + process_chat_completion_stream_response, ) from .config import FireworksImplConfig @@ -38,12 +43,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): self, stack_to_provider_models_map=FIREWORKS_SUPPORTED_MODELS ) self.config = config - self.tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(self.tokenizer) - - @property - def client(self) -> Fireworks: - return Fireworks(api_key=self.config.api_key) + self.formatter = ChatFormat(Tokenizer.get_instance()) async def initialize(self) -> None: return @@ -51,7 +51,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, @@ -61,16 +61,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def get_fireworks_chat_options(self, request: ChatCompletionRequest) -> dict: - options = {} - if request.sampling_params is not None: - for attr in {"temperature", "top_p", "top_k", "max_tokens"}: - if getattr(request.sampling_params, attr): - options[attr] = getattr(request.sampling_params, attr) - - return options - - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -92,154 +83,41 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): logprobs=logprobs, ) - messages = augment_messages_for_tools(request) - model_input = self.formatter.encode_dialog_prompt(messages) - prompt = self.tokenizer.decode(model_input.tokens) + client = Fireworks(api_key=self.config.api_key) + if stream: + return self._stream_chat_completion(request, client) + else: + return self._nonstream_chat_completion(request, client) + + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest, client: Fireworks + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = await client.completion.acreate(**params) + return process_chat_completion_response(request, r, self.formatter) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest, client: Fireworks + ) -> AsyncGenerator: + params = self._get_params(request) + + stream = client.completion.acreate(**params) + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk + + def _get_params(self, request: ChatCompletionRequest) -> dict: + prompt = chat_completion_request_to_prompt(request, self.formatter) # Fireworks always prepends with BOS if prompt.startswith("<|begin_of_text|>"): prompt = prompt[len("<|begin_of_text|>") :] - # accumulate sampling params and other options to pass to fireworks - options = self.get_fireworks_chat_options(request) + options = get_sampling_options(request) options.setdefault("max_tokens", 512) - - fireworks_model = self.map_to_provider_model(request.model) - - if not request.stream: - r = await self.client.completion.acreate( - model=fireworks_model, - prompt=prompt, - stream=False, - **options, - ) - stop_reason = None - if r.choices[0].finish_reason: - if r.choices[0].finish_reason == "stop": - stop_reason = StopReason.end_of_turn - elif r.choices[0].finish_reason == "length": - stop_reason = StopReason.out_of_tokens - - completion_message = self.formatter.decode_assistant_message_from_content( - r.choices[0].text, stop_reason - ) - - yield ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, - ) - else: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - - buffer = "" - ipython = False - stop_reason = None - - async for chunk in self.client.completion.acreate( - model=fireworks_model, - prompt=prompt, - stream=True, - **options, - ): - if chunk.choices[0].finish_reason: - if stop_reason is None and chunk.choices[0].finish_reason == "stop": - stop_reason = StopReason.end_of_turn - elif ( - stop_reason is None - and chunk.choices[0].finish_reason == "length" - ): - stop_reason = StopReason.out_of_tokens - break - - text = chunk.choices[0].text - if text is None: - continue - - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue - - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text, - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) - ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + return { + "model": self.map_to_provider_model(request.model), + "prompt": prompt, + "stream": request.stream, + **options, + } diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 80d2ad4c8..86d72ca7f 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -9,17 +9,22 @@ from typing import AsyncGenerator import httpx from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import Message, StopReason +from llama_models.llama3.api.datatypes import Message from llama_models.llama3.api.tokenizer import Tokenizer from ollama import AsyncClient from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_prompt, +) +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + OpenAICompatCompletionChoice, + OpenAICompatCompletionResponse, + process_chat_completion_response, + process_chat_completion_stream_response, ) -from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper - OLLAMA_SUPPORTED_MODELS = { "Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16", @@ -30,14 +35,10 @@ OLLAMA_SUPPORTED_MODELS = { } -class OllamaInferenceAdapter(ModelRegistryHelper, Inference): +class OllamaInferenceAdapter(Inference): def __init__(self, url: str) -> None: - ModelRegistryHelper.__init__( - self, stack_to_provider_models_map=OLLAMA_SUPPORTED_MODELS - ) self.url = url - self.tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(self.tokenizer) + self.formatter = ChatFormat(Tokenizer.get_instance()) @property def client(self) -> AsyncClient: @@ -55,6 +56,28 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass + async def register_model(self, model: ModelDef) -> None: + if model.identifier not in OLLAMA_SUPPORTED_MODELS: + raise ValueError( + f"Unsupported model {model.identifier}. Supported models: {OLLAMA_SUPPORTED_MODELS.keys()}" + ) + + ollama_model = OLLAMA_SUPPORTED_MODELS[model.identifier] + res = await self.client.ps() + need_model_pull = True + for r in res["models"]: + if ollama_model == r["model"]: + need_model_pull = False + break + + print(f"Ollama model `{ollama_model}` needs pull -> {need_model_pull}") + if need_model_pull: + print(f"Pulling model: {ollama_model}") + status = await self.client.pull(ollama_model) + assert ( + status["status"] == "success" + ), f"Failed to pull model {self.model} in ollama" + def completion( self, model: str, @@ -65,20 +88,6 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def get_ollama_chat_options(self, request: ChatCompletionRequest) -> dict: - options = {} - if request.sampling_params is not None: - for attr in {"temperature", "top_p", "top_k", "max_tokens"}: - if getattr(request.sampling_params, attr): - options[attr] = getattr(request.sampling_params, attr) - if ( - request.sampling_params.repetition_penalty is not None - and request.sampling_params.repetition_penalty != 1.0 - ): - options["repeat_penalty"] = request.sampling_params.repetition_penalty - - return options - def chat_completion( self, model: str, @@ -90,22 +99,6 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: - ollama_model = self.map_to_provider_model(model) - - res = await self.client.ps() - need_model_pull = True - for r in res["models"]: - if ollama_model == r["model"]: - need_model_pull = False - break - - if need_model_pull: - print(f"Pulling model: {ollama_model}") - status = await self.client.pull(ollama_model) - assert ( - status["status"] == "success" - ), f"Failed to pull model {self.model} in ollama" - request = ChatCompletionRequest( model=model, messages=messages, @@ -116,24 +109,16 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): stream=stream, logprobs=logprobs, ) - if stream: return self._stream_chat_completion(request) else: return self._nonstream_chat_completion(request) def _get_params(self, request: ChatCompletionRequest) -> dict: - messages = augment_messages_for_tools(request) - model_input = self.formatter.encode_dialog_prompt(messages) - prompt = self.tokenizer.decode(model_input.tokens) - - # accumulate sampling params and other options to pass to ollama - options = self.get_ollama_chat_options(request) - return { - "model": self.map_to_provider_model(request.model), - "prompt": prompt, - "options": options, + "model": OLLAMA_SUPPORTED_MODELS[request.model], + "prompt": chat_completion_request_to_prompt(request, self.formatter), + "options": get_sampling_options(request), "raw": True, "stream": request.stream, } @@ -143,129 +128,35 @@ class OllamaInferenceAdapter(ModelRegistryHelper, Inference): ) -> ChatCompletionResponse: params = self._get_params(request) r = await self.client.generate(**params) - stop_reason = None - if r["done"]: - if r["done_reason"] == "stop": - stop_reason = StopReason.end_of_turn - elif r["done_reason"] == "length": - stop_reason = StopReason.out_of_tokens + assert isinstance(r, dict) - completion_message = self.formatter.decode_assistant_message_from_content( - r["response"], stop_reason + choice = OpenAICompatCompletionChoice( + finish_reason=r["done_reason"] if r["done"] else None, + text=r["response"], ) - return ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, + response = OpenAICompatCompletionResponse( + choices=[choice], ) + return process_chat_completion_response(request, response, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest ) -> AsyncGenerator: params = self._get_params(request) - stream = await self.client.generate(**params) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - - buffer = "" - ipython = False - stop_reason = None - - async for chunk in stream: - if chunk["done"]: - if stop_reason is None and chunk["done_reason"] == "stop": - stop_reason = StopReason.end_of_turn - elif stop_reason is None and chunk["done_reason"] == "length": - stop_reason = StopReason.out_of_tokens - break - - text = chunk["response"] - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) + async def _generate_and_convert_to_openai_compat(): + s = await self.client.generate(**params) + async for chunk in s: + choice = OpenAICompatCompletionChoice( + finish_reason=chunk["done_reason"] if chunk["done"] else None, + text=chunk["response"], ) - buffer += text - continue - - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, + yield OpenAICompatCompletionResponse( + choices=[choice], ) - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text, - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) - ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + stream = _generate_and_convert_to_openai_compat() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 5326d83d4..d9a9ae491 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -8,7 +8,7 @@ from typing import AsyncGenerator from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import Message, StopReason +from llama_models.llama3.api.datatypes import Message from llama_models.llama3.api.tokenizer import Tokenizer from together import Together @@ -16,9 +16,14 @@ from together import Together from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.request_headers import NeedsRequestProviderData from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_prompt, ) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + process_chat_completion_response, + process_chat_completion_stream_response, +) from .config import TogetherImplConfig @@ -41,8 +46,7 @@ class TogetherInferenceAdapter( self, stack_to_provider_models_map=TOGETHER_SUPPORTED_MODELS ) self.config = config - self.tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(self.tokenizer) + self.formatter = ChatFormat(Tokenizer.get_instance()) @property def client(self) -> Together: @@ -64,16 +68,7 @@ class TogetherInferenceAdapter( ) -> AsyncGenerator: raise NotImplementedError() - def get_together_chat_options(self, request: ChatCompletionRequest) -> dict: - options = {} - if request.sampling_params is not None: - for attr in {"temperature", "top_p", "top_k", "max_tokens"}: - if getattr(request.sampling_params, attr): - options[attr] = getattr(request.sampling_params, attr) - - return options - - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -84,7 +79,6 @@ class TogetherInferenceAdapter( stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: - together_api_key = None if self.config.api_key is not None: together_api_key = self.config.api_key @@ -109,148 +103,39 @@ class TogetherInferenceAdapter( logprobs=logprobs, ) - # accumulate sampling params and other options to pass to together - options = self.get_together_chat_options(request) - together_model = self.map_to_provider_model(request.model) - messages = augment_messages_for_tools(request) - model_input = self.formatter.encode_dialog_prompt(messages) - prompt = self.tokenizer.decode(model_input.tokens) - - if not request.stream: - # TODO: might need to add back an async here - r = client.completions.create( - model=together_model, - prompt=prompt, - stream=False, - **options, - ) - stop_reason = None - choice = r.choices[0] - if choice.finish_reason: - if choice.finish_reason in ["stop", "eos"]: - stop_reason = StopReason.end_of_turn - stop_reason = StopReason.end_of_turn - elif choice.finish_reason == "length": - stop_reason = StopReason.out_of_tokens - - completion_message = self.formatter.decode_assistant_message_from_content( - choice.text, stop_reason - ) - yield ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, - ) + if stream: + return self._stream_chat_completion(request, client) else: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) + return self._nonstream_chat_completion(request, client) - buffer = "" - ipython = False - stop_reason = None + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest, client: Together + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = client.completions.create(**params) + return process_chat_completion_response(request, r, self.formatter) - for chunk in client.completions.create( - model=together_model, - prompt=prompt, - stream=True, - **options, - ): - choice = chunk.choices[0] - if finish_reason := choice.finish_reason: - if stop_reason is None and finish_reason in ["stop", "eos"]: - stop_reason = StopReason.end_of_turn - elif stop_reason is None and finish_reason == "length": - stop_reason = StopReason.out_of_tokens - break + async def _stream_chat_completion( + self, request: ChatCompletionRequest, client: Together + ) -> AsyncGenerator: + params = self._get_params(request) - text = choice.delta.content - if text is None: - continue + # if we shift to TogetherAsyncClient, we won't need this wrapper + async def _to_async_generator(): + s = client.completions.create(**params) + for chunk in s: + yield chunk - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue + stream = _to_async_generator() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text, - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) - ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + def _get_params(self, request: ChatCompletionRequest) -> dict: + return { + "model": self.map_to_provider_model(request.model), + "prompt": chat_completion_request_to_prompt(request, self.formatter), + "stream": request.stream, + **get_sampling_options(request), + } diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 38b9ff860..6b12a54e6 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -55,7 +55,7 @@ def get_expected_stop_reason(model: str): @pytest_asyncio.fixture( scope="session", params=[ - {"model": Llama_8B}, + # {"model": Llama_8B}, {"model": Llama_3B}, ], ids=lambda d: d["model"], @@ -112,20 +112,16 @@ def sample_tool_definition(): @pytest.mark.asyncio async def test_chat_completion_non_streaming(inference_settings, sample_messages): inference_impl = inference_settings["impl"] - response = [ - r - async for r in inference_impl.chat_completion( - messages=sample_messages, - stream=False, - **inference_settings["common_params"], - ) - ] + response = await inference_impl.chat_completion( + messages=sample_messages, + stream=False, + **inference_settings["common_params"], + ) - assert len(response) == 1 - assert isinstance(response[0], ChatCompletionResponse) - assert response[0].completion_message.role == "assistant" - assert isinstance(response[0].completion_message.content, str) - assert len(response[0].completion_message.content) > 0 + assert isinstance(response, ChatCompletionResponse) + assert response.completion_message.role == "assistant" + assert isinstance(response.completion_message.content, str) + assert len(response.completion_message.content) > 0 @pytest.mark.asyncio @@ -166,20 +162,16 @@ async def test_chat_completion_with_tool_calling( ) ] - response = [ - r - async for r in inference_impl.chat_completion( - messages=messages, - tools=[sample_tool_definition], - stream=False, - **inference_settings["common_params"], - ) - ] + response = await inference_impl.chat_completion( + messages=messages, + tools=[sample_tool_definition], + stream=False, + **inference_settings["common_params"], + ) - assert len(response) == 1 - assert isinstance(response[0], ChatCompletionResponse) + assert isinstance(response, ChatCompletionResponse) - message = response[0].completion_message + message = response.completion_message # This is not supported in most providers :/ they don't return eom_id / eot_id # stop_reason = get_expected_stop_reason(inference_settings["common_params"]["model"]) diff --git a/llama_stack/providers/utils/inference/augment_messages.py b/llama_stack/providers/utils/inference/augment_messages.py index 613a39525..a69b80d7b 100644 --- a/llama_stack/providers/utils/inference/augment_messages.py +++ b/llama_stack/providers/utils/inference/augment_messages.py @@ -3,6 +3,7 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from llama_models.llama3.api.chat_format import ChatFormat from termcolor import cprint from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 @@ -19,6 +20,14 @@ from llama_models.sku_list import resolve_model from llama_stack.providers.utils.inference import supported_inference_models +def chat_completion_request_to_prompt( + request: ChatCompletionRequest, formatter: ChatFormat +) -> str: + messages = augment_messages_for_tools(request) + model_input = formatter.encode_dialog_prompt(messages) + return formatter.tokenizer.decode(model_input.tokens) + + def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]: """Reads chat completion request and augments the messages to handle tools. For eg. for llama_3_1, add system message with the appropriate tools or @@ -48,7 +57,6 @@ def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]: def augment_messages_for_tools_llama_3_1( request: ChatCompletionRequest, ) -> List[Message]: - assert request.tool_choice == ToolChoice.auto, "Only `ToolChoice.auto` supported" existing_messages = request.messages diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py new file mode 100644 index 000000000..a39002976 --- /dev/null +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -0,0 +1,187 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import AsyncGenerator, Optional + +from llama_models.llama3.api.chat_format import ChatFormat + +from llama_models.llama3.api.datatypes import StopReason + +from llama_stack.apis.inference import * # noqa: F403 + +from pydantic import BaseModel + + +class OpenAICompatCompletionChoiceDelta(BaseModel): + content: str + + +class OpenAICompatCompletionChoice(BaseModel): + finish_reason: Optional[str] = None + text: Optional[str] = None + delta: Optional[OpenAICompatCompletionChoiceDelta] = None + + +class OpenAICompatCompletionResponse(BaseModel): + choices: List[OpenAICompatCompletionChoice] + + +def get_sampling_options(request: ChatCompletionRequest) -> dict: + options = {} + if params := request.sampling_params: + for attr in {"temperature", "top_p", "top_k", "max_tokens"}: + if getattr(params, attr): + options[attr] = getattr(params, attr) + + if params.repetition_penalty is not None and params.repetition_penalty != 1.0: + options["repeat_penalty"] = params.repetition_penalty + + return options + + +def text_from_choice(choice) -> str: + if hasattr(choice, "delta") and choice.delta: + return choice.delta.content + + return choice.text + + +def process_chat_completion_response( + request: ChatCompletionRequest, + response: OpenAICompatCompletionResponse, + formatter: ChatFormat, +) -> ChatCompletionResponse: + choice = response.choices[0] + + stop_reason = None + if reason := choice.finish_reason: + if reason in ["stop", "eos"]: + stop_reason = StopReason.end_of_turn + elif reason == "length": + stop_reason = StopReason.out_of_tokens + + if stop_reason is None: + stop_reason = StopReason.out_of_tokens + + completion_message = formatter.decode_assistant_message_from_content( + text_from_choice(choice), stop_reason + ) + return ChatCompletionResponse( + completion_message=completion_message, + logprobs=None, + ) + + +async def process_chat_completion_stream_response( + request: ChatCompletionRequest, + stream: AsyncGenerator[OpenAICompatCompletionResponse, None], + formatter: ChatFormat, +) -> AsyncGenerator: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + + buffer = "" + ipython = False + stop_reason = None + + async for chunk in stream: + choice = chunk.choices[0] + finish_reason = choice.finish_reason + + if finish_reason: + if stop_reason is None and finish_reason in ["stop", "eos"]: + stop_reason = StopReason.end_of_turn + elif stop_reason is None and finish_reason == "length": + stop_reason = StopReason.out_of_tokens + break + + text = text_from_choice(choice) + # check if its a tool call ( aka starts with <|python_tag|> ) + if not ipython and text.startswith("<|python_tag|>"): + ipython = True + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), + ) + ) + buffer += text + continue + + if ipython: + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + + buffer += text + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=delta, + stop_reason=stop_reason, + ) + ) + else: + buffer += text + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=text, + stop_reason=stop_reason, + ) + ) + + # parse tool calls and report errors + message = formatter.decode_assistant_message_from_content(buffer, stop_reason) + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), + stop_reason=stop_reason, + ) + ) + + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.complete, + delta="", + stop_reason=stop_reason, + ) + ) From ed899a5dec86a94870b3467607d677a53bb231fd Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 12:57:34 -0700 Subject: [PATCH 57/69] Convert TGI to work with openai_compat --- .../inference/databricks/databricks.py | 213 ++++-------------- .../providers/adapters/inference/tgi/tgi.py | 213 ++++++------------ .../adapters/inference/together/together.py | 5 - .../tests/inference/test_inference.py | 4 +- .../utils/inference/augment_messages.py | 14 ++ .../utils/inference/openai_compat.py | 22 +- 6 files changed, 133 insertions(+), 338 deletions(-) diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index 6d106ccf1..f318e6180 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -8,16 +8,22 @@ from typing import AsyncGenerator from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import Message, StopReason +from llama_models.llama3.api.datatypes import Message from llama_models.llama3.api.tokenizer import Tokenizer from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 + from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_prompt, ) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + process_chat_completion_response, + process_chat_completion_stream_response, +) from .config import DatabricksImplConfig @@ -34,12 +40,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): self, stack_to_provider_models_map=DATABRICKS_SUPPORTED_MODELS ) self.config = config - tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(tokenizer) - - @property - def client(self) -> OpenAI: - return OpenAI(base_url=self.config.url, api_key=self.config.api_token) + self.formatter = ChatFormat(Tokenizer.get_instance()) async def initialize(self) -> None: return @@ -47,35 +48,10 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - async def validate_routing_keys(self, routing_keys: list[str]) -> None: - # these are the model names the Llama Stack will use to route requests to this provider - # perform validation here if necessary - pass - - async def completion(self, request: CompletionRequest) -> AsyncGenerator: + def completion(self, request: CompletionRequest) -> AsyncGenerator: raise NotImplementedError() - def _messages_to_databricks_messages(self, messages: list[Message]) -> list: - databricks_messages = [] - for message in messages: - if message.role == "ipython": - role = "tool" - else: - role = message.role - databricks_messages.append({"role": role, "content": message.content}) - - return databricks_messages - - def get_databricks_chat_options(self, request: ChatCompletionRequest) -> dict: - options = {} - if request.sampling_params is not None: - for attr in {"temperature", "top_p", "top_k", "max_tokens"}: - if getattr(request.sampling_params, attr): - options[attr] = getattr(request.sampling_params, attr) - - return options - - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -97,146 +73,39 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): logprobs=logprobs, ) - messages = augment_messages_for_tools(request) - options = self.get_databricks_chat_options(request) - databricks_model = self.map_to_provider_model(request.model) - - if not request.stream: - r = self.client.chat.completions.create( - model=databricks_model, - messages=self._messages_to_databricks_messages(messages), - stream=False, - **options, - ) - - stop_reason = None - if r.choices[0].finish_reason: - if r.choices[0].finish_reason == "stop": - stop_reason = StopReason.end_of_turn - elif r.choices[0].finish_reason == "length": - stop_reason = StopReason.out_of_tokens - - completion_message = self.formatter.decode_assistant_message_from_content( - r.choices[0].message.content, stop_reason - ) - yield ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, - ) + client = OpenAI(base_url=self.config.url, api_key=self.config.api_token) + if stream: + return self._stream_chat_completion(request, client) else: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) + return self._nonstream_chat_completion(request, client) - buffer = "" - ipython = False - stop_reason = None + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest, client: OpenAI + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = client.completions.create(**params) + return process_chat_completion_response(request, r, self.formatter) - for chunk in self.client.chat.completions.create( - model=databricks_model, - messages=self._messages_to_databricks_messages(messages), - stream=True, - **options, - ): - if chunk.choices[0].finish_reason: - if stop_reason is None and chunk.choices[0].finish_reason == "stop": - stop_reason = StopReason.end_of_turn - elif ( - stop_reason is None - and chunk.choices[0].finish_reason == "length" - ): - stop_reason = StopReason.out_of_tokens - break + async def _stream_chat_completion( + self, request: ChatCompletionRequest, client: OpenAI + ) -> AsyncGenerator: + params = self._get_params(request) - text = chunk.choices[0].delta.content + async def _to_async_generator(): + s = client.completions.create(**params) + for chunk in s: + yield chunk - if text is None: - continue + stream = _to_async_generator() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue - - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - buffer += text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text, - stop_reason=stop_reason, - ) - ) - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) - ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + def _get_params(self, request: ChatCompletionRequest) -> dict: + return { + "model": self.map_to_provider_model(request.model), + "prompt": chat_completion_request_to_prompt(request, self.formatter), + "stream": request.stream, + **get_sampling_options(request), + } diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index 0ad20edd6..bd05f98bb 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -10,13 +10,19 @@ from typing import AsyncGenerator from huggingface_hub import AsyncInferenceClient, HfApi from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import StopReason from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_model_input_info, +) +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + OpenAICompatCompletionChoice, + OpenAICompatCompletionResponse, + process_chat_completion_response, + process_chat_completion_stream_response, ) from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig @@ -30,8 +36,7 @@ class _HfAdapter(Inference): model_id: str def __init__(self) -> None: - self.tokenizer = Tokenizer.get_instance() - self.formatter = ChatFormat(self.tokenizer) + self.formatter = ChatFormat(Tokenizer.get_instance()) async def register_model(self, model: ModelDef) -> None: resolved_model = resolve_model(model.identifier) @@ -49,7 +54,7 @@ class _HfAdapter(Inference): async def shutdown(self) -> None: pass - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, @@ -59,16 +64,7 @@ class _HfAdapter(Inference): ) -> AsyncGenerator: raise NotImplementedError() - def get_chat_options(self, request: ChatCompletionRequest) -> dict: - options = {} - if request.sampling_params is not None: - for attr in {"temperature", "top_p", "top_k", "max_tokens"}: - if getattr(request.sampling_params, attr): - options[attr] = getattr(request.sampling_params, attr) - - return options - - async def chat_completion( + def chat_completion( self, model: str, messages: List[Message], @@ -90,145 +86,64 @@ class _HfAdapter(Inference): logprobs=logprobs, ) - messages = augment_messages_for_tools(request) - model_input = self.formatter.encode_dialog_prompt(messages) - prompt = self.tokenizer.decode(model_input.tokens) + if stream: + return self._stream_chat_completion(request) + else: + return self._nonstream_chat_completion(request) - input_tokens = len(model_input.tokens) + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = await self.client.text_generation(**params) + + choice = OpenAICompatCompletionChoice( + finish_reason=r.details.finish_reason, + text="".join(t.text for t in r.details.tokens), + ) + response = OpenAICompatCompletionResponse( + choices=[choice], + ) + return process_chat_completion_response(request, response, self.formatter) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest + ) -> AsyncGenerator: + params = self._get_params(request) + + async def _generate_and_convert_to_openai_compat(): + s = await self.client.text_generation(**params) + async for chunk in s: + token_result = chunk.token + + choice = OpenAICompatCompletionChoice(text=token_result.text) + yield OpenAICompatCompletionResponse( + choices=[choice], + ) + + stream = _generate_and_convert_to_openai_compat() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk + + def _get_params(self, request: ChatCompletionRequest) -> dict: + prompt, input_tokens = chat_completion_request_to_model_input_info( + request, self.formatter + ) max_new_tokens = min( request.sampling_params.max_tokens or (self.max_tokens - input_tokens), self.max_tokens - input_tokens - 1, ) - - options = self.get_chat_options(request) - if not request.stream: - response = await self.client.text_generation( - prompt=prompt, - stream=False, - details=True, - max_new_tokens=max_new_tokens, - stop_sequences=["<|eom_id|>", "<|eot_id|>"], - **options, - ) - stop_reason = None - if response.details.finish_reason: - if response.details.finish_reason in ["stop", "eos_token"]: - stop_reason = StopReason.end_of_turn - elif response.details.finish_reason == "length": - stop_reason = StopReason.out_of_tokens - - generated_text = "".join(t.text for t in response.details.tokens) - completion_message = self.formatter.decode_assistant_message_from_content( - generated_text, - stop_reason, - ) - yield ChatCompletionResponse( - completion_message=completion_message, - logprobs=None, - ) - - else: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - buffer = "" - ipython = False - stop_reason = None - tokens = [] - - async for response in await self.client.text_generation( - prompt=prompt, - stream=True, - details=True, - max_new_tokens=max_new_tokens, - stop_sequences=["<|eom_id|>", "<|eot_id|>"], - **options, - ): - token_result = response.token - - buffer += token_result.text - tokens.append(token_result.id) - - if not ipython and buffer.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer = buffer[len("<|python_tag|>") :] - continue - - if token_result.text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - elif token_result.text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - else: - text = token_result.text - - if ipython: - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - else: - delta = text - - if stop_reason is None: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - - if stop_reason is None: - stop_reason = StopReason.out_of_tokens - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message(tokens, stop_reason) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) - ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + options = get_sampling_options(request) + return dict( + prompt=prompt, + stream=request.stream, + details=True, + max_new_tokens=max_new_tokens, + stop_sequences=["<|eom_id|>", "<|eot_id|>"], + **options, + ) class TGIAdapter(_HfAdapter): diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index d9a9ae491..adea696fb 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -48,10 +48,6 @@ class TogetherInferenceAdapter( self.config = config self.formatter = ChatFormat(Tokenizer.get_instance()) - @property - def client(self) -> Together: - return Together(api_key=self.config.api_key) - async def initialize(self) -> None: return @@ -91,7 +87,6 @@ class TogetherInferenceAdapter( together_api_key = provider_data.together_api_key client = Together(api_key=together_api_key) - # wrapper request to make it easier to pass around (internal only, not exposed to API) request = ChatCompletionRequest( model=model, messages=messages, diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 6b12a54e6..107a534d5 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -55,8 +55,8 @@ def get_expected_stop_reason(model: str): @pytest_asyncio.fixture( scope="session", params=[ - # {"model": Llama_8B}, - {"model": Llama_3B}, + {"model": Llama_8B}, + # {"model": Llama_3B}, ], ids=lambda d: d["model"], ) diff --git a/llama_stack/providers/utils/inference/augment_messages.py b/llama_stack/providers/utils/inference/augment_messages.py index a69b80d7b..8f59b5295 100644 --- a/llama_stack/providers/utils/inference/augment_messages.py +++ b/llama_stack/providers/utils/inference/augment_messages.py @@ -3,8 +3,11 @@ # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +from typing import Tuple + from llama_models.llama3.api.chat_format import ChatFormat from termcolor import cprint + from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 from llama_models.datatypes import ModelFamily @@ -28,6 +31,17 @@ def chat_completion_request_to_prompt( return formatter.tokenizer.decode(model_input.tokens) +def chat_completion_request_to_model_input_info( + request: ChatCompletionRequest, formatter: ChatFormat +) -> Tuple[str, int]: + messages = augment_messages_for_tools(request) + model_input = formatter.encode_dialog_prompt(messages) + return ( + formatter.tokenizer.decode(model_input.tokens), + len(model_input.tokens), + ) + + def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]: """Reads chat completion request and augments the messages to handle tools. For eg. for llama_3_1, add system message with the appropriate tools or diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index a39002976..118880b29 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -60,6 +60,8 @@ def process_chat_completion_response( if reason := choice.finish_reason: if reason in ["stop", "eos"]: stop_reason = StopReason.end_of_turn + elif reason == "eom": + stop_reason = StopReason.end_of_message elif reason == "length": stop_reason = StopReason.out_of_tokens @@ -96,7 +98,7 @@ async def process_chat_completion_stream_response( finish_reason = choice.finish_reason if finish_reason: - if stop_reason is None and finish_reason in ["stop", "eos"]: + if stop_reason is None and finish_reason in ["stop", "eos", "eos_token"]: stop_reason = StopReason.end_of_turn elif stop_reason is None and finish_reason == "length": stop_reason = StopReason.out_of_tokens @@ -118,16 +120,16 @@ async def process_chat_completion_stream_response( buffer += text continue - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + if ipython: buffer += text delta = ToolCallDelta( content=text, From 336cf7a6747be37ec285f35ec1f41c387a913658 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 13:38:32 -0700 Subject: [PATCH 58/69] update vllm; not quite tested yet --- llama_stack/providers/impls/vllm/vllm.py | 224 ++++++----------------- 1 file changed, 55 insertions(+), 169 deletions(-) diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py index 0f8e8d38c..748871b4e 100644 --- a/llama_stack/providers/impls/vllm/vllm.py +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -10,39 +10,26 @@ import uuid from typing import Any from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import ( - CompletionMessage, - InterleavedTextMedia, - Message, - StopReason, - ToolChoice, - ToolDefinition, - ToolPromptFormat, -) +from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_models.llama3.api.tokenizer import Tokenizer from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.sampling_params import SamplingParams -from llama_stack.apis.inference import ChatCompletionRequest, Inference +from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.apis.inference.inference import ( - ChatCompletionResponse, - ChatCompletionResponseEvent, - ChatCompletionResponseEventType, - ChatCompletionResponseStreamChunk, - CompletionResponse, - CompletionResponseStreamChunk, - EmbeddingsResponse, - LogProbConfig, - ToolCallDelta, - ToolCallParseStatus, -) from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, + chat_completion_request_to_prompt, ) + from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper +from llama_stack.providers.utils.inference.openai_compat import ( + OpenAICompatCompletionChoice, + OpenAICompatCompletionResponse, + process_chat_completion_response, + process_chat_completion_stream_response, +) from .config import VLLMConfig @@ -72,10 +59,10 @@ def _vllm_sampling_params(sampling_params: Any) -> SamplingParams: if sampling_params.repetition_penalty > 0: kwargs["repetition_penalty"] = sampling_params.repetition_penalty - return SamplingParams().from_optional(**kwargs) + return SamplingParams(**kwargs) -class VLLMInferenceImpl(Inference, ModelRegistryHelper): +class VLLMInferenceImpl(ModelRegistryHelper, Inference): """Inference implementation for vLLM.""" HF_MODEL_MAPPINGS = { @@ -148,7 +135,7 @@ class VLLMInferenceImpl(Inference, ModelRegistryHelper): if self.engine: self.engine.shutdown_background_loop() - async def completion( + def completion( self, model: str, content: InterleavedTextMedia, @@ -157,17 +144,16 @@ class VLLMInferenceImpl(Inference, ModelRegistryHelper): logprobs: LogProbConfig | None = None, ) -> CompletionResponse | CompletionResponseStreamChunk: log.info("vLLM completion") - messages = [Message(role="user", content=content)] - async for result in self.chat_completion( + messages = [UserMessage(content=content)] + return self.chat_completion( model=model, messages=messages, sampling_params=sampling_params, stream=stream, logprobs=logprobs, - ): - yield result + ) - async def chat_completion( + def chat_completion( self, model: str, messages: list[Message], @@ -194,159 +180,59 @@ class VLLMInferenceImpl(Inference, ModelRegistryHelper): ) log.info("Sampling params: %s", sampling_params) - vllm_sampling_params = _vllm_sampling_params(sampling_params) - - messages = augment_messages_for_tools(request) - log.info("Augmented messages: %s", messages) - prompt = "".join([str(message.content) for message in messages]) - request_id = _random_uuid() + + prompt = chat_completion_request_to_prompt(request, self.formatter) + vllm_sampling_params = _vllm_sampling_params(request.sampling_params) results_generator = self.engine.generate( prompt, vllm_sampling_params, request_id ) - - if not stream: - # Non-streaming case - final_output = None - stop_reason = None - async for request_output in results_generator: - final_output = request_output - if stop_reason is None and request_output.outputs: - reason = request_output.outputs[-1].stop_reason - if reason == "stop": - stop_reason = StopReason.end_of_turn - elif reason == "length": - stop_reason = StopReason.out_of_tokens - - if not stop_reason: - stop_reason = StopReason.end_of_message - - if final_output: - response = "".join([output.text for output in final_output.outputs]) - yield ChatCompletionResponse( - completion_message=CompletionMessage( - content=response, - stop_reason=stop_reason, - ), - logprobs=None, - ) + if stream: + return self._stream_chat_completion(request, results_generator) else: - # Streaming case - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) + return self._nonstream_chat_completion(request, results_generator) - buffer = "" - last_chunk = "" - ipython = False - stop_reason = None + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest, results_generator: AsyncGenerator + ) -> ChatCompletionResponse: + outputs = [o async for o in results_generator] + final_output = outputs[-1] + assert final_output is not None + outputs = final_output.outputs + finish_reason = outputs[-1].stop_reason + choice = OpenAICompatCompletionChoice( + finish_reason=finish_reason, + text="".join([output.text for output in outputs]), + ) + response = OpenAICompatCompletionResponse( + choices=[choice], + ) + return process_chat_completion_response(request, response, self.formatter) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest, results_generator: AsyncGenerator + ) -> AsyncGenerator: + async def _generate_and_convert_to_openai_compat(): async for chunk in results_generator: if not chunk.outputs: log.warning("Empty chunk received") continue - if chunk.outputs[-1].stop_reason: - reason = chunk.outputs[-1].stop_reason - if stop_reason is None and reason == "stop": - stop_reason = StopReason.end_of_turn - elif stop_reason is None and reason == "length": - stop_reason = StopReason.out_of_tokens - break - text = "".join([output.text for output in chunk.outputs]) - - # check if its a tool call ( aka starts with <|python_tag|> ) - if not ipython and text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - buffer += text - continue - - if ipython: - if text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - continue - elif text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - continue - - buffer += text - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=delta, - stop_reason=stop_reason, - ) - ) - else: - last_chunk_len = len(last_chunk) - last_chunk = text - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=text[last_chunk_len:], - stop_reason=stop_reason, - ) - ) - - if not stop_reason: - stop_reason = StopReason.end_of_message - - # parse tool calls and report errors - message = self.formatter.decode_assistant_message_from_content( - buffer, stop_reason - ) - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), - stop_reason=stop_reason, - ) + choice = OpenAICompatCompletionChoice( + finish_reason=chunk.outputs[-1].stop_reason, + text=text, + ) + yield OpenAICompatCompletionResponse( + choices=[choice], ) - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) + stream = _generate_and_convert_to_openai_compat() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk async def embeddings( self, model: str, contents: list[InterleavedTextMedia] From 640c5c54f7761f365a5358a4998b2ecebc872651 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 13:48:44 -0700 Subject: [PATCH 59/69] rename augment_messages --- .../adapters/inference/databricks/databricks.py | 6 +++--- .../adapters/inference/fireworks/fireworks.py | 6 +++--- .../providers/adapters/inference/ollama/ollama.py | 6 +++--- llama_stack/providers/adapters/inference/tgi/tgi.py | 6 +++--- .../adapters/inference/together/together.py | 6 +++--- .../impls/meta_reference/inference/inference.py | 8 ++++---- llama_stack/providers/impls/vllm/vllm.py | 7 +++---- .../providers/tests/inference/test_inference.py | 2 +- .../providers/tests/inference/test_prompt_adapter.py | 12 ++++++------ .../{augment_messages.py => prompt_adapter.py} | 8 +++++--- 10 files changed, 34 insertions(+), 33 deletions(-) rename tests/test_augment_messages.py => llama_stack/providers/tests/inference/test_prompt_adapter.py (91%) rename llama_stack/providers/utils/inference/{augment_messages.py => prompt_adapter.py} (96%) diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index f318e6180..847c85eba 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -15,15 +15,15 @@ from openai import OpenAI from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_prompt, -) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) from .config import DatabricksImplConfig diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index ce57480a0..c0edc836a 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -15,15 +15,15 @@ from llama_models.llama3.api.tokenizer import Tokenizer from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_prompt, -) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) from .config import FireworksImplConfig diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 86d72ca7f..fe5e39c30 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -15,9 +15,6 @@ from llama_models.llama3.api.tokenizer import Tokenizer from ollama import AsyncClient from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_prompt, -) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, OpenAICompatCompletionChoice, @@ -25,6 +22,9 @@ from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) OLLAMA_SUPPORTED_MODELS = { "Llama3.1-8B-Instruct": "llama3.1:8b-instruct-fp16", diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index bd05f98bb..59eb7f3f1 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -14,9 +14,6 @@ from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_model_input_info, -) from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, OpenAICompatCompletionChoice, @@ -24,6 +21,9 @@ from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_model_input_info, +) from .config import InferenceAPIImplConfig, InferenceEndpointImplConfig, TGIImplConfig diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index adea696fb..0ef5bc593 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -15,15 +15,15 @@ from together import Together from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.request_headers import NeedsRequestProviderData -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_prompt, -) from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( get_sampling_options, process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) from .config import TogetherImplConfig diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index ad8cc31fd..9e31f0834 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -12,8 +12,8 @@ from llama_models.sku_list import resolve_model from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - augment_messages_for_tools, +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_messages, ) from .config import MetaReferenceImplConfig @@ -94,7 +94,7 @@ class MetaReferenceInferenceImpl(Inference): async def _nonstream_chat_completion( self, request: ChatCompletionRequest ) -> ChatCompletionResponse: - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) tokens = [] logprobs = [] @@ -136,7 +136,7 @@ class MetaReferenceInferenceImpl(Inference): async def _stream_chat_completion( self, request: ChatCompletionRequest ) -> AsyncGenerator: - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py index 748871b4e..e0b063ac9 100644 --- a/llama_stack/providers/impls/vllm/vllm.py +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -19,10 +19,6 @@ from vllm.sampling_params import SamplingParams from llama_stack.apis.inference import * # noqa: F403 -from llama_stack.providers.utils.inference.augment_messages import ( - chat_completion_request_to_prompt, -) - from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionChoice, @@ -30,6 +26,9 @@ from llama_stack.providers.utils.inference.openai_compat import ( process_chat_completion_response, process_chat_completion_stream_response, ) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) from .config import VLLMConfig diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 107a534d5..b864c2ef4 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -56,7 +56,7 @@ def get_expected_stop_reason(model: str): scope="session", params=[ {"model": Llama_8B}, - # {"model": Llama_3B}, + {"model": Llama_3B}, ], ids=lambda d: d["model"], ) diff --git a/tests/test_augment_messages.py b/llama_stack/providers/tests/inference/test_prompt_adapter.py similarity index 91% rename from tests/test_augment_messages.py rename to llama_stack/providers/tests/inference/test_prompt_adapter.py index 1c2eb62b4..3a1e25d65 100644 --- a/tests/test_augment_messages.py +++ b/llama_stack/providers/tests/inference/test_prompt_adapter.py @@ -8,7 +8,7 @@ import unittest from llama_models.llama3.api import * # noqa: F403 from llama_stack.inference.api import * # noqa: F403 -from llama_stack.inference.augment_messages import augment_messages_for_tools +from llama_stack.inference.prompt_adapter import chat_completion_request_to_messages MODEL = "Llama3.1-8B-Instruct" @@ -22,7 +22,7 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): UserMessage(content=content), ], ) - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) self.assertEqual(len(messages), 2) self.assertEqual(messages[-1].content, content) self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content) @@ -39,7 +39,7 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): ToolDefinition(tool_name=BuiltinTool.brave_search), ], ) - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) self.assertEqual(len(messages), 2) self.assertEqual(messages[-1].content, content) self.assertTrue("Cutting Knowledge Date: December 2023" in messages[0].content) @@ -67,7 +67,7 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): ], tool_prompt_format=ToolPromptFormat.json, ) - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) self.assertEqual(len(messages), 3) self.assertTrue("Environment: ipython" in messages[0].content) @@ -97,7 +97,7 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): ), ], ) - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) self.assertEqual(len(messages), 3) self.assertTrue("Environment: ipython" in messages[0].content) @@ -119,7 +119,7 @@ class PrepareMessagesTests(unittest.IsolatedAsyncioTestCase): ToolDefinition(tool_name=BuiltinTool.code_interpreter), ], ) - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) self.assertEqual(len(messages), 2, messages) self.assertTrue(messages[0].content.endswith(system_prompt)) diff --git a/llama_stack/providers/utils/inference/augment_messages.py b/llama_stack/providers/utils/inference/prompt_adapter.py similarity index 96% rename from llama_stack/providers/utils/inference/augment_messages.py rename to llama_stack/providers/utils/inference/prompt_adapter.py index 8f59b5295..5b8ded52c 100644 --- a/llama_stack/providers/utils/inference/augment_messages.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -26,7 +26,7 @@ from llama_stack.providers.utils.inference import supported_inference_models def chat_completion_request_to_prompt( request: ChatCompletionRequest, formatter: ChatFormat ) -> str: - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) model_input = formatter.encode_dialog_prompt(messages) return formatter.tokenizer.decode(model_input.tokens) @@ -34,7 +34,7 @@ def chat_completion_request_to_prompt( def chat_completion_request_to_model_input_info( request: ChatCompletionRequest, formatter: ChatFormat ) -> Tuple[str, int]: - messages = augment_messages_for_tools(request) + messages = chat_completion_request_to_messages(request) model_input = formatter.encode_dialog_prompt(messages) return ( formatter.tokenizer.decode(model_input.tokens), @@ -42,7 +42,9 @@ def chat_completion_request_to_model_input_info( ) -def augment_messages_for_tools(request: ChatCompletionRequest) -> List[Message]: +def chat_completion_request_to_messages( + request: ChatCompletionRequest, +) -> List[Message]: """Reads chat completion request and augments the messages to handle tools. For eg. for llama_3_1, add system message with the appropriate tools or add user messsage for custom tools, etc. From 7f1160296c4ee2a80050af334caf6b489f37cbd9 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 14:28:50 -0700 Subject: [PATCH 60/69] Updates to server.py to clean up streaming vs non-streaming stuff Also make sure agent turn create is correctly marked --- llama_stack/apis/agents/agents.py | 4 +- llama_stack/apis/agents/client.py | 31 +++-- llama_stack/apis/memory_banks/memory_banks.py | 2 +- llama_stack/apis/models/models.py | 4 +- llama_stack/apis/safety/client.py | 6 - llama_stack/apis/shields/shields.py | 6 +- llama_stack/distribution/resolver.py | 16 ++- .../distribution/routers/routing_tables.py | 5 +- llama_stack/distribution/server/server.py | 130 ++++++------------ llama_stack/providers/datatypes.py | 3 + .../meta_reference/agents/agent_instance.py | 15 +- .../impls/meta_reference/agents/agents.py | 16 ++- .../providers/tests/agents/__init__.py | 5 + 13 files changed, 115 insertions(+), 128 deletions(-) create mode 100644 llama_stack/providers/tests/agents/__init__.py diff --git a/llama_stack/apis/agents/agents.py b/llama_stack/apis/agents/agents.py index f9ad44efc..6efe1b229 100644 --- a/llama_stack/apis/agents/agents.py +++ b/llama_stack/apis/agents/agents.py @@ -411,8 +411,10 @@ class Agents(Protocol): agent_config: AgentConfig, ) -> AgentCreateResponse: ... + # This method is not `async def` because it can result in either an + # `AsyncGenerator` or a `AgentTurnCreateResponse` depending on the value of `stream`. @webmethod(route="/agents/turn/create") - async def create_agent_turn( + def create_agent_turn( self, agent_id: str, session_id: str, diff --git a/llama_stack/apis/agents/client.py b/llama_stack/apis/agents/client.py index 27ebde57a..32bc9abdd 100644 --- a/llama_stack/apis/agents/client.py +++ b/llama_stack/apis/agents/client.py @@ -7,7 +7,7 @@ import asyncio import json import os -from typing import AsyncGenerator +from typing import AsyncGenerator, Optional import fire import httpx @@ -67,9 +67,17 @@ class AgentsClient(Agents): response.raise_for_status() return AgentSessionCreateResponse(**response.json()) - async def create_agent_turn( + def create_agent_turn( self, request: AgentTurnCreateRequest, + ) -> AsyncGenerator: + if request.stream: + return self._stream_agent_turn(request) + else: + return self._nonstream_agent_turn(request) + + async def _stream_agent_turn( + self, request: AgentTurnCreateRequest ) -> AsyncGenerator: async with httpx.AsyncClient() as client: async with client.stream( @@ -93,6 +101,9 @@ class AgentsClient(Agents): print(data) print(f"Error with parsing or validation: {e}") + async def _nonstream_agent_turn(self, request: AgentTurnCreateRequest): + raise NotImplementedError("Non-streaming not implemented yet") + async def _run_agent( api, model, tool_definitions, tool_prompt_format, user_prompts, attachments=None @@ -132,8 +143,7 @@ async def _run_agent( log.print() -async def run_llama_3_1(host: str, port: int): - model = "Llama3.1-8B-Instruct" +async def run_llama_3_1(host: str, port: int, model: str = "Llama3.1-8B-Instruct"): api = AgentsClient(f"http://{host}:{port}") tool_definitions = [ @@ -173,8 +183,7 @@ async def run_llama_3_1(host: str, port: int): await _run_agent(api, model, tool_definitions, ToolPromptFormat.json, user_prompts) -async def run_llama_3_2_rag(host: str, port: int): - model = "Llama3.2-3B-Instruct" +async def run_llama_3_2_rag(host: str, port: int, model: str = "Llama3.2-3B-Instruct"): api = AgentsClient(f"http://{host}:{port}") urls = [ @@ -215,8 +224,7 @@ async def run_llama_3_2_rag(host: str, port: int): ) -async def run_llama_3_2(host: str, port: int): - model = "Llama3.2-3B-Instruct" +async def run_llama_3_2(host: str, port: int, model: str = "Llama3.2-3B-Instruct"): api = AgentsClient(f"http://{host}:{port}") # zero shot tools for llama3.2 text models @@ -262,7 +270,7 @@ async def run_llama_3_2(host: str, port: int): ) -def main(host: str, port: int, run_type: str): +def main(host: str, port: int, run_type: str, model: Optional[str] = None): assert run_type in [ "tools_llama_3_1", "tools_llama_3_2", @@ -274,7 +282,10 @@ def main(host: str, port: int, run_type: str): "tools_llama_3_2": run_llama_3_2, "rag_llama_3_2": run_llama_3_2_rag, } - asyncio.run(fn[run_type](host, port)) + args = [host, port] + if model is not None: + args.append(model) + asyncio.run(fn[run_type](*args)) if __name__ == "__main__": diff --git a/llama_stack/apis/memory_banks/memory_banks.py b/llama_stack/apis/memory_banks/memory_banks.py index d54c3868d..6d9f2f9f6 100644 --- a/llama_stack/apis/memory_banks/memory_banks.py +++ b/llama_stack/apis/memory_banks/memory_banks.py @@ -22,7 +22,7 @@ class MemoryBankType(Enum): class CommonDef(BaseModel): identifier: str - provider_id: str + provider_id: Optional[str] = None @json_schema_type diff --git a/llama_stack/apis/models/models.py b/llama_stack/apis/models/models.py index 21dd17ca2..3a770af25 100644 --- a/llama_stack/apis/models/models.py +++ b/llama_stack/apis/models/models.py @@ -18,8 +18,8 @@ class ModelDef(BaseModel): llama_model: str = Field( description="Pointer to the core Llama family model", ) - provider_id: str = Field( - description="The provider instance which serves this model" + provider_id: Optional[str] = Field( + default=None, description="The provider instance which serves this model" ) # For now, we are only supporting core llama models but as soon as finetuned # and other custom models (for example various quantizations) are allowed, there diff --git a/llama_stack/apis/safety/client.py b/llama_stack/apis/safety/client.py index e601e6dba..35843e206 100644 --- a/llama_stack/apis/safety/client.py +++ b/llama_stack/apis/safety/client.py @@ -96,12 +96,6 @@ async def run_main(host: str, port: int, image_path: str = None): ) print(response) - response = await client.run_shield( - shield_type="injection_shield", - messages=[message], - ) - print(response) - def main(host: str, port: int, image: str = None): asyncio.run(run_main(host, port, image)) diff --git a/llama_stack/apis/shields/shields.py b/llama_stack/apis/shields/shields.py index db507a383..cec82516e 100644 --- a/llama_stack/apis/shields/shields.py +++ b/llama_stack/apis/shields/shields.py @@ -23,12 +23,12 @@ class ShieldDef(BaseModel): identifier: str = Field( description="A unique identifier for the shield type", ) - provider_id: str = Field( - description="The provider instance which serves this shield" - ) type: str = Field( description="The type of shield this is; the value is one of the ShieldType enum" ) + provider_id: Optional[str] = Field( + default=None, description="The provider instance which serves this shield" + ) params: Dict[str, Any] = Field( default_factory=dict, description="Any additional parameters needed for this shield", diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index 4db72d29e..857eef757 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -50,8 +50,10 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An f"Provider `{provider.provider_type}` is not available for API `{api}`" ) + p = all_api_providers[api][provider.provider_type] + p.deps__ = [a.value for a in p.api_dependencies] spec = ProviderWithSpec( - spec=all_api_providers[api][provider.provider_type], + spec=p, **(provider.dict()), ) specs[provider.provider_id] = spec @@ -93,6 +95,10 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An registry=registry, module="llama_stack.distribution.routers", api_dependencies=inner_deps, + deps__=( + [x.value for x in inner_deps] + + [f"inner-{info.router_api.value}"] + ), ), ) } @@ -107,6 +113,7 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An module="llama_stack.distribution.routers", routing_table_api=info.routing_table_api, api_dependencies=[info.routing_table_api], + deps__=([info.routing_table_api.value]), ), ) } @@ -130,6 +137,7 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An config_class="llama_stack.distribution.inspect.DistributionInspectConfig", module="llama_stack.distribution.inspect", api_dependencies=apis, + deps__=([x.value for x in apis]), ), ), ) @@ -175,10 +183,8 @@ def topological_sort( deps = [] for provider in providers: - for dep in provider.spec.api_dependencies: - deps.append(dep.value) - if isinstance(provider, AutoRoutedProviderSpec): - deps.append(f"inner-{provider.api}") + for dep in provider.spec.deps__: + deps.append(dep) for dep in deps: if dep not in visited: diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 73e26dd2e..7cb6e8432 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -39,6 +39,7 @@ class CommonRoutingTableImpl(RoutingTable): ) -> None: for obj in registry: if obj.provider_id not in impls_by_provider_id: + print(f"{impls_by_provider_id=}") raise ValueError( f"Provider `{obj.provider_id}` pointed by `{obj.identifier}` not found" ) @@ -70,7 +71,7 @@ class CommonRoutingTableImpl(RoutingTable): def get_provider_impl(self, routing_key: str) -> Any: if routing_key not in self.routing_key_to_object: - raise ValueError(f"Object `{routing_key}` not registered") + raise ValueError(f"`{routing_key}` not registered") obj = self.routing_key_to_object[routing_key] if obj.provider_id not in self.impls_by_provider_id: @@ -86,7 +87,7 @@ class CommonRoutingTableImpl(RoutingTable): async def register_object(self, obj: RoutableObject): if obj.identifier in self.routing_key_to_object: - print(f"Object `{obj.identifier}` is already registered") + print(f"`{obj.identifier}` is already registered") return if not obj.provider_id: diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 7b19f7996..5c1a7806d 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -11,13 +11,9 @@ import json import signal import traceback -from collections.abc import ( - AsyncGenerator as AsyncGeneratorABC, - AsyncIterator as AsyncIteratorABC, -) from contextlib import asynccontextmanager from ssl import SSLError -from typing import Any, AsyncGenerator, AsyncIterator, Dict, get_type_hints, Optional +from typing import Any, Dict, Optional import fire import httpx @@ -44,42 +40,13 @@ from llama_stack.distribution.resolver import resolve_impls_with_routing from .endpoints import get_all_api_endpoints -def is_async_iterator_type(typ): - if hasattr(typ, "__origin__"): - origin = typ.__origin__ - if isinstance(origin, type): - return issubclass( - origin, - (AsyncIterator, AsyncGenerator, AsyncIteratorABC, AsyncGeneratorABC), - ) - return False - return isinstance( - typ, (AsyncIterator, AsyncGenerator, AsyncIteratorABC, AsyncGeneratorABC) - ) - - -def create_sse_event(data: Any, **kwargs) -> str: +def create_sse_event(data: Any) -> str: if isinstance(data, BaseModel): data = data.json() else: data = json.dumps(data) - # !!FIX THIS ASAP!! grossest hack ever; not really SSE - # - # we use the return type of the function to determine if there's an AsyncGenerator - # and change the implementation to send SSE. unfortunately, chat_completion() takes a - # parameter called stream which _changes_ the return type. one correct way to fix this is: - # - # - have separate underlying functions for streaming and non-streaming because they need - # to operate differently anyhow - # - do a late binding of the return type based on the parameters passed in - if kwargs.get("stream", False): - return f"data: {data}\n\n" - else: - print( - f"!!FIX THIS ASAP!! Sending non-SSE event because client really is non-SSE: {data}" - ) - return data + return f"data: {data}\n\n" async def global_exception_handler(request: Request, exc: Exception): @@ -221,65 +188,56 @@ def create_dynamic_passthrough( return endpoint +def is_streaming_request(func_name: str, request: Request, **kwargs): + # TODO: pass the api method and punt it to the Protocol definition directly + return kwargs.get("stream", False) + + +async def sse_generator(event_gen): + try: + async for item in event_gen: + yield create_sse_event(item) + await asyncio.sleep(0.01) + except asyncio.CancelledError: + print("Generator cancelled") + await event_gen.aclose() + except Exception as e: + traceback.print_exception(e) + yield create_sse_event( + { + "error": { + "message": str(translate_exception(e)), + }, + } + ) + finally: + await end_trace() + + def create_dynamic_typed_route(func: Any, method: str): - hints = get_type_hints(func) - response_model = hints.get("return") - # NOTE: I think it is better to just add a method within each Api - # "Protocol" / adapter-impl to tell what sort of a response this request - # is going to produce. /chat_completion can produce a streaming or - # non-streaming response depending on if request.stream is True / False. - is_streaming = is_async_iterator_type(response_model) + async def endpoint(request: Request, **kwargs): + await start_trace(func.__name__) - if is_streaming: + set_request_provider_data(request.headers) - async def endpoint(request: Request, **kwargs): - await start_trace(func.__name__) - - set_request_provider_data(request.headers) - - async def sse_generator(event_gen): - try: - async for item in event_gen: - yield create_sse_event(item, **kwargs) - await asyncio.sleep(0.01) - except asyncio.CancelledError: - print("Generator cancelled") - await event_gen.aclose() - except Exception as e: - traceback.print_exception(e) - yield create_sse_event( - { - "error": { - "message": str(translate_exception(e)), - }, - } - ) - finally: - await end_trace() - - return StreamingResponse( - sse_generator(func(**kwargs)), media_type="text/event-stream" - ) - - else: - - async def endpoint(request: Request, **kwargs): - await start_trace(func.__name__) - - set_request_provider_data(request.headers) - - try: + is_streaming = is_streaming_request(func.__name__, request, **kwargs) + try: + if is_streaming: + return StreamingResponse( + sse_generator(func(**kwargs)), media_type="text/event-stream" + ) + else: return ( await func(**kwargs) if asyncio.iscoroutinefunction(func) else func(**kwargs) ) - except Exception as e: - traceback.print_exception(e) - raise translate_exception(e) from e - finally: - await end_trace() + except Exception as e: + traceback.print_exception(e) + raise translate_exception(e) from e + finally: + await end_trace() sig = inspect.signature(func) new_params = [ diff --git a/llama_stack/providers/datatypes.py b/llama_stack/providers/datatypes.py index 0c8f6ad21..44ecb5355 100644 --- a/llama_stack/providers/datatypes.py +++ b/llama_stack/providers/datatypes.py @@ -41,6 +41,9 @@ class ProviderSpec(BaseModel): description="Higher-level API surfaces may depend on other providers to provide their functionality", ) + # used internally by the resolver; this is a hack for now + deps__: List[str] = Field(default_factory=list) + class RoutingTable(Protocol): def get_provider_impl(self, routing_key: str) -> Any: ... diff --git a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 661da10cc..fca335bf5 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -144,6 +144,8 @@ class ChatAgent(ShieldRunnerMixin): async def create_and_execute_turn( self, request: AgentTurnCreateRequest ) -> AsyncGenerator: + assert request.stream is True, "Non-streaming not supported" + session_info = await self.storage.get_session_info(request.session_id) if session_info is None: raise ValueError(f"Session {request.session_id} not found") @@ -635,14 +637,13 @@ class ChatAgent(ShieldRunnerMixin): raise ValueError(f"Session {session_id} not found") if session_info.memory_bank_id is None: - memory_bank = await self.memory_api.create_memory_bank( - name=f"memory_bank_{session_id}", - config=VectorMemoryBankConfig( - embedding_model="all-MiniLM-L6-v2", - chunk_size_in_tokens=512, - ), + bank_id = f"memory_bank_{session_id}" + memory_bank = VectorMemoryBankDef( + identifier=bank_id, + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, ) - bank_id = memory_bank.bank_id + await self.memory_api.register_memory_bank(memory_bank) await self.storage.add_memory_bank_to_session(session_id, bank_id) else: bank_id = session_info.memory_bank_id diff --git a/llama_stack/providers/impls/meta_reference/agents/agents.py b/llama_stack/providers/impls/meta_reference/agents/agents.py index 0673cd16f..e6fa1744d 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agents.py +++ b/llama_stack/providers/impls/meta_reference/agents/agents.py @@ -100,7 +100,7 @@ class MetaReferenceAgentsImpl(Agents): session_id=session_id, ) - async def create_agent_turn( + def create_agent_turn( self, agent_id: str, session_id: str, @@ -113,16 +113,22 @@ class MetaReferenceAgentsImpl(Agents): attachments: Optional[List[Attachment]] = None, stream: Optional[bool] = False, ) -> AsyncGenerator: - agent = await self.get_agent(agent_id) - - # wrapper request to make it easier to pass around (internal only, not exposed to API) request = AgentTurnCreateRequest( agent_id=agent_id, session_id=session_id, messages=messages, attachments=attachments, - stream=stream, + stream=True, ) + if stream: + return self._create_agent_turn_streaming(request) + else: + raise NotImplementedError("Non-streaming agent turns not yet implemented") + async def _create_agent_turn_streaming( + self, + request: AgentTurnCreateRequest, + ) -> AsyncGenerator: + agent = await self.get_agent(request.agent_id) async for event in agent.create_and_execute_turn(request): yield event diff --git a/llama_stack/providers/tests/agents/__init__.py b/llama_stack/providers/tests/agents/__init__.py new file mode 100644 index 000000000..756f351d8 --- /dev/null +++ b/llama_stack/providers/tests/agents/__init__.py @@ -0,0 +1,5 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. From 380b9dab9006c9d503e52af50fddc7340af52873 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 15:19:19 -0700 Subject: [PATCH 61/69] regen openapi specs --- docs/resources/llama-stack-spec.html | 1083 ++++++++++++-------------- docs/resources/llama-stack-spec.yaml | 623 +++++++-------- 2 files changed, 786 insertions(+), 920 deletions(-) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index 0d06ce03d..21d356f62 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-02 15:40:53.008257" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-08 15:18:57.600111" }, "servers": [ { @@ -422,46 +422,6 @@ } } }, - "/memory/create": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/MemoryBank" - } - } - } - } - }, - "tags": [ - "Memory" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CreateMemoryBankRequest" - } - } - }, - "required": true - } - } - }, "/agents/delete": { "post": { "responses": { @@ -594,46 +554,6 @@ } } }, - "/memory/drop": { - "post": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "type": "string" - } - } - } - } - }, - "tags": [ - "Memory" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/DropMemoryBankRequest" - } - } - }, - "required": true - } - } - }, "/inference/embeddings": { "post": { "responses": { @@ -1180,7 +1100,7 @@ ] } }, - "/memory/get": { + "/memory_banks/get": { "get": { "responses": { "200": { @@ -1190,7 +1110,20 @@ "schema": { "oneOf": [ { - "$ref": "#/components/schemas/MemoryBank" + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBankDef" + }, + { + "$ref": "#/components/schemas/GraphMemoryBankDef" + } + ] }, { "type": "null" @@ -1202,11 +1135,11 @@ } }, "tags": [ - "Memory" + "MemoryBanks" ], "parameters": [ { - "name": "bank_id", + "name": "identifier", "in": "query", "required": true, "schema": { @@ -1235,7 +1168,7 @@ "schema": { "oneOf": [ { - "$ref": "#/components/schemas/ModelServingSpec" + "$ref": "#/components/schemas/ModelDef" }, { "type": "null" @@ -1251,7 +1184,7 @@ ], "parameters": [ { - "name": "core_model_id", + "name": "identifier", "in": "query", "required": true, "schema": { @@ -1270,51 +1203,6 @@ ] } }, - "/memory_banks/get": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/json": { - "schema": { - "oneOf": [ - { - "$ref": "#/components/schemas/MemoryBankSpec" - }, - { - "type": "null" - } - ] - } - } - } - } - }, - "tags": [ - "MemoryBanks" - ], - "parameters": [ - { - "name": "bank_type", - "in": "query", - "required": true, - "schema": { - "$ref": "#/components/schemas/MemoryBankType" - } - }, - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, "/shields/get": { "get": { "responses": { @@ -1325,7 +1213,49 @@ "schema": { "oneOf": [ { - "$ref": "#/components/schemas/ShieldSpec" + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "type": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type", + "params" + ] }, { "type": "null" @@ -1613,7 +1543,20 @@ "content": { "application/jsonl": { "schema": { - "$ref": "#/components/schemas/MemoryBankSpec" + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBankDef" + }, + { + "$ref": "#/components/schemas/GraphMemoryBankDef" + } + ] } } } @@ -1635,36 +1578,6 @@ ] } }, - "/memory/list": { - "get": { - "responses": { - "200": { - "description": "OK", - "content": { - "application/jsonl": { - "schema": { - "$ref": "#/components/schemas/MemoryBank" - } - } - } - } - }, - "tags": [ - "Memory" - ], - "parameters": [ - { - "name": "X-LlamaStack-ProviderData", - "in": "header", - "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", - "required": false, - "schema": { - "type": "string" - } - } - ] - } - }, "/models/list": { "get": { "responses": { @@ -1673,7 +1586,7 @@ "content": { "application/jsonl": { "schema": { - "$ref": "#/components/schemas/ModelServingSpec" + "$ref": "#/components/schemas/ModelDef" } } } @@ -1772,7 +1685,7 @@ "content": { "application/jsonl": { "schema": { - "$ref": "#/components/schemas/ShieldSpec" + "$ref": "#/components/schemas/ShieldDef" } } } @@ -1907,6 +1820,105 @@ } } }, + "/memory_banks/register": { + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Memory" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterMemoryBankRequest" + } + } + }, + "required": true + } + } + }, + "/inference/register_model": { + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Models" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterModelRequest" + } + } + }, + "required": true + } + } + }, + "/safety/register_shield": { + "post": { + "responses": { + "200": { + "description": "OK" + } + }, + "tags": [ + "Shields" + ], + "parameters": [ + { + "name": "X-LlamaStack-ProviderData", + "in": "header", + "description": "JSON-encoded provider data which will be made available to the adapter servicing the API", + "required": false, + "schema": { + "type": "string" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/RegisterShieldRequest" + } + } + }, + "required": true + } + } + }, "/reward_scoring/score": { "post": { "responses": { @@ -4305,184 +4317,6 @@ "dataset" ] }, - "CreateMemoryBankRequest": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "config": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "vector", - "default": "vector" - }, - "embedding_model": { - "type": "string" - }, - "chunk_size_in_tokens": { - "type": "integer" - }, - "overlap_size_in_tokens": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "embedding_model", - "chunk_size_in_tokens" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "keyvalue", - "default": "keyvalue" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "keyword", - "default": "keyword" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "graph", - "default": "graph" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - } - ] - }, - "url": { - "$ref": "#/components/schemas/URL" - } - }, - "additionalProperties": false, - "required": [ - "name", - "config" - ] - }, - "MemoryBank": { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - }, - "name": { - "type": "string" - }, - "config": { - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "vector", - "default": "vector" - }, - "embedding_model": { - "type": "string" - }, - "chunk_size_in_tokens": { - "type": "integer" - }, - "overlap_size_in_tokens": { - "type": "integer" - } - }, - "additionalProperties": false, - "required": [ - "type", - "embedding_model", - "chunk_size_in_tokens" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "keyvalue", - "default": "keyvalue" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "keyword", - "default": "keyword" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "const": "graph", - "default": "graph" - } - }, - "additionalProperties": false, - "required": [ - "type" - ] - } - ] - }, - "url": { - "$ref": "#/components/schemas/URL" - } - }, - "additionalProperties": false, - "required": [ - "bank_id", - "name", - "config" - ] - }, "DeleteAgentsRequest": { "type": "object", "properties": { @@ -4542,18 +4376,6 @@ "document_ids" ] }, - "DropMemoryBankRequest": { - "type": "object", - "properties": { - "bank_id": { - "type": "string" - } - }, - "additionalProperties": false, - "required": [ - "bank_id" - ] - }, "EmbeddingsRequest": { "type": "object", "properties": { @@ -4693,6 +4515,69 @@ }, "additionalProperties": false }, + "GraphMemoryBankDef": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "graph", + "default": "graph" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type" + ] + }, + "KeyValueMemoryBankDef": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "keyvalue", + "default": "keyvalue" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type" + ] + }, + "KeywordMemoryBankDef": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "keyword", + "default": "keyword" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type" + ] + }, "Session": { "type": "object", "properties": { @@ -4713,7 +4598,20 @@ "format": "date-time" }, "memory_bank": { - "$ref": "#/components/schemas/MemoryBank" + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBankDef" + }, + { + "$ref": "#/components/schemas/GraphMemoryBankDef" + } + ] } }, "additionalProperties": false, @@ -4725,6 +4623,38 @@ ], "title": "A single session of an interaction with an Agentic System." }, + "VectorMemoryBankDef": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "type": { + "type": "string", + "const": "vector", + "default": "vector" + }, + "embedding_model": { + "type": "string" + }, + "chunk_size_in_tokens": { + "type": "integer" + }, + "overlap_size_in_tokens": { + "type": "integer" + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type", + "embedding_model", + "chunk_size_in_tokens" + ] + }, "AgentStepResponse": { "type": "object", "properties": { @@ -4870,169 +4800,23 @@ "job_uuid" ] }, - "Model": { - "description": "The model family and SKU of the model along with other parameters corresponding to the model." - }, - "ModelServingSpec": { + "ModelDef": { "type": "object", "properties": { - "llama_model": { - "$ref": "#/components/schemas/Model" - }, - "provider_config": { - "type": "object", - "properties": { - "provider_type": { - "type": "string" - }, - "config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "provider_type", - "config" - ] - } - }, - "additionalProperties": false, - "required": [ - "llama_model", - "provider_config" - ] - }, - "MemoryBankType": { - "type": "string", - "enum": [ - "vector", - "keyvalue", - "keyword", - "graph" - ] - }, - "MemoryBankSpec": { - "type": "object", - "properties": { - "bank_type": { - "$ref": "#/components/schemas/MemoryBankType" - }, - "provider_config": { - "type": "object", - "properties": { - "provider_type": { - "type": "string" - }, - "config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "provider_type", - "config" - ] - } - }, - "additionalProperties": false, - "required": [ - "bank_type", - "provider_config" - ] - }, - "ShieldSpec": { - "type": "object", - "properties": { - "shield_type": { + "identifier": { "type": "string" }, - "provider_config": { - "type": "object", - "properties": { - "provider_type": { - "type": "string" - }, - "config": { - "type": "object", - "additionalProperties": { - "oneOf": [ - { - "type": "null" - }, - { - "type": "boolean" - }, - { - "type": "number" - }, - { - "type": "string" - }, - { - "type": "array" - }, - { - "type": "object" - } - ] - } - } - }, - "additionalProperties": false, - "required": [ - "provider_type", - "config" - ] + "llama_model": { + "type": "string" + }, + "provider_id": { + "type": "string" } }, "additionalProperties": false, "required": [ - "shield_type", - "provider_config" + "identifier", + "llama_model" ] }, "Trace": { @@ -5222,17 +5006,17 @@ "ProviderInfo": { "type": "object", "properties": { - "provider_type": { + "provider_id": { "type": "string" }, - "description": { + "provider_type": { "type": "string" } }, "additionalProperties": false, "required": [ - "provider_type", - "description" + "provider_id", + "provider_type" ] }, "RouteInfo": { @@ -5244,7 +5028,7 @@ "method": { "type": "string" }, - "providers": { + "provider_types": { "type": "array", "items": { "type": "string" @@ -5255,7 +5039,52 @@ "required": [ "route", "method", - "providers" + "provider_types" + ] + }, + "ShieldDef": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "type": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type", + "params" ] }, "LogSeverity": { @@ -5838,6 +5667,97 @@ "scores" ] }, + "RegisterMemoryBankRequest": { + "type": "object", + "properties": { + "memory_bank": { + "oneOf": [ + { + "$ref": "#/components/schemas/VectorMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeyValueMemoryBankDef" + }, + { + "$ref": "#/components/schemas/KeywordMemoryBankDef" + }, + { + "$ref": "#/components/schemas/GraphMemoryBankDef" + } + ] + } + }, + "additionalProperties": false, + "required": [ + "memory_bank" + ] + }, + "RegisterModelRequest": { + "type": "object", + "properties": { + "model": { + "$ref": "#/components/schemas/ModelDef" + } + }, + "additionalProperties": false, + "required": [ + "model" + ] + }, + "RegisterShieldRequest": { + "type": "object", + "properties": { + "shield": { + "type": "object", + "properties": { + "identifier": { + "type": "string" + }, + "type": { + "type": "string" + }, + "provider_id": { + "type": "string" + }, + "params": { + "type": "object", + "additionalProperties": { + "oneOf": [ + { + "type": "null" + }, + { + "type": "boolean" + }, + { + "type": "number" + }, + { + "type": "string" + }, + { + "type": "array" + }, + { + "type": "object" + } + ] + } + } + }, + "additionalProperties": false, + "required": [ + "identifier", + "type", + "params" + ] + } + }, + "additionalProperties": false, + "required": [ + "shield" + ] + }, "DialogGenerations": { "type": "object", "properties": { @@ -6369,50 +6289,50 @@ } ], "tags": [ - { - "name": "Datasets" - }, - { - "name": "Inspect" - }, - { - "name": "Memory" - }, { "name": "BatchInference" }, { - "name": "Agents" + "name": "Datasets" }, { "name": "Inference" }, { - "name": "Shields" + "name": "Evaluations" }, { - "name": "SyntheticDataGeneration" - }, - { - "name": "Models" - }, - { - "name": "RewardScoring" - }, - { - "name": "MemoryBanks" + "name": "Memory" }, { "name": "Safety" }, { - "name": "Evaluations" + "name": "PostTraining" + }, + { + "name": "MemoryBanks" + }, + { + "name": "Models" + }, + { + "name": "Shields" + }, + { + "name": "Inspect" + }, + { + "name": "SyntheticDataGeneration" }, { "name": "Telemetry" }, { - "name": "PostTraining" + "name": "Agents" + }, + { + "name": "RewardScoring" }, { "name": "BuiltinTool", @@ -6674,14 +6594,6 @@ "name": "CreateDatasetRequest", "description": "" }, - { - "name": "CreateMemoryBankRequest", - "description": "" - }, - { - "name": "MemoryBank", - "description": "" - }, { "name": "DeleteAgentsRequest", "description": "" @@ -6698,10 +6610,6 @@ "name": "DeleteDocumentsRequest", "description": "" }, - { - "name": "DropMemoryBankRequest", - "description": "" - }, { "name": "EmbeddingsRequest", "description": "" @@ -6730,10 +6638,26 @@ "name": "GetAgentsSessionRequest", "description": "" }, + { + "name": "GraphMemoryBankDef", + "description": "" + }, + { + "name": "KeyValueMemoryBankDef", + "description": "" + }, + { + "name": "KeywordMemoryBankDef", + "description": "" + }, { "name": "Session", "description": "A single session of an interaction with an Agentic System.\n\n" }, + { + "name": "VectorMemoryBankDef", + "description": "" + }, { "name": "AgentStepResponse", "description": "" @@ -6759,24 +6683,8 @@ "description": "" }, { - "name": "Model", - "description": "The model family and SKU of the model along with other parameters corresponding to the model.\n\n" - }, - { - "name": "ModelServingSpec", - "description": "" - }, - { - "name": "MemoryBankType", - "description": "" - }, - { - "name": "MemoryBankSpec", - "description": "" - }, - { - "name": "ShieldSpec", - "description": "" + "name": "ModelDef", + "description": "" }, { "name": "Trace", @@ -6822,6 +6730,10 @@ "name": "RouteInfo", "description": "" }, + { + "name": "ShieldDef", + "description": "" + }, { "name": "LogSeverity", "description": "" @@ -6882,6 +6794,18 @@ "name": "QueryDocumentsResponse", "description": "" }, + { + "name": "RegisterMemoryBankRequest", + "description": "" + }, + { + "name": "RegisterModelRequest", + "description": "" + }, + { + "name": "RegisterShieldRequest", + "description": "" + }, { "name": "DialogGenerations", "description": "" @@ -7001,7 +6925,6 @@ "CreateAgentSessionRequest", "CreateAgentTurnRequest", "CreateDatasetRequest", - "CreateMemoryBankRequest", "DPOAlignmentConfig", "DeleteAgentsRequest", "DeleteAgentsSessionRequest", @@ -7009,7 +6932,6 @@ "DeleteDocumentsRequest", "DialogGenerations", "DoraFinetuningConfig", - "DropMemoryBankRequest", "EmbeddingsRequest", "EmbeddingsResponse", "EvaluateQuestionAnsweringRequest", @@ -7023,22 +6945,21 @@ "FunctionCallToolDefinition", "GetAgentsSessionRequest", "GetDocumentsRequest", + "GraphMemoryBankDef", "HealthInfo", "ImageMedia", "InferenceStep", "InsertDocumentsRequest", + "KeyValueMemoryBankDef", + "KeywordMemoryBankDef", "LogEventRequest", "LogSeverity", "LoraFinetuningConfig", - "MemoryBank", "MemoryBankDocument", - "MemoryBankSpec", - "MemoryBankType", "MemoryRetrievalStep", "MemoryToolDefinition", "MetricEvent", - "Model", - "ModelServingSpec", + "ModelDef", "OptimizerConfig", "PhotogenToolDefinition", "PostTrainingJob", @@ -7052,6 +6973,9 @@ "QueryDocumentsRequest", "QueryDocumentsResponse", "RLHFAlgorithm", + "RegisterMemoryBankRequest", + "RegisterModelRequest", + "RegisterShieldRequest", "RestAPIExecutionConfig", "RestAPIMethod", "RewardScoreRequest", @@ -7067,7 +6991,7 @@ "SearchToolDefinition", "Session", "ShieldCallStep", - "ShieldSpec", + "ShieldDef", "SpanEndPayload", "SpanStartPayload", "SpanStatus", @@ -7097,6 +7021,7 @@ "UnstructuredLogEvent", "UpdateDocumentsRequest", "UserMessage", + "VectorMemoryBankDef", "ViolationLevel", "WolframAlphaToolDefinition" ] diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index 317d1ee33..3f1bcc7c7 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -580,63 +580,6 @@ components: - uuid - dataset type: object - CreateMemoryBankRequest: - additionalProperties: false - properties: - config: - oneOf: - - additionalProperties: false - properties: - chunk_size_in_tokens: - type: integer - embedding_model: - type: string - overlap_size_in_tokens: - type: integer - type: - const: vector - default: vector - type: string - required: - - type - - embedding_model - - chunk_size_in_tokens - type: object - - additionalProperties: false - properties: - type: - const: keyvalue - default: keyvalue - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: keyword - default: keyword - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: graph - default: graph - type: string - required: - - type - type: object - name: - type: string - url: - $ref: '#/components/schemas/URL' - required: - - name - - config - type: object DPOAlignmentConfig: additionalProperties: false properties: @@ -739,14 +682,6 @@ components: - rank - alpha type: object - DropMemoryBankRequest: - additionalProperties: false - properties: - bank_id: - type: string - required: - - bank_id - type: object EmbeddingsRequest: additionalProperties: false properties: @@ -908,6 +843,21 @@ components: required: - document_ids type: object + GraphMemoryBankDef: + additionalProperties: false + properties: + identifier: + type: string + provider_id: + type: string + type: + const: graph + default: graph + type: string + required: + - identifier + - type + type: object HealthInfo: additionalProperties: false properties: @@ -973,6 +923,36 @@ components: - bank_id - documents type: object + KeyValueMemoryBankDef: + additionalProperties: false + properties: + identifier: + type: string + provider_id: + type: string + type: + const: keyvalue + default: keyvalue + type: string + required: + - identifier + - type + type: object + KeywordMemoryBankDef: + additionalProperties: false + properties: + identifier: + type: string + provider_id: + type: string + type: + const: keyword + default: keyword + type: string + required: + - identifier + - type + type: object LogEventRequest: additionalProperties: false properties: @@ -1015,66 +995,6 @@ components: - rank - alpha type: object - MemoryBank: - additionalProperties: false - properties: - bank_id: - type: string - config: - oneOf: - - additionalProperties: false - properties: - chunk_size_in_tokens: - type: integer - embedding_model: - type: string - overlap_size_in_tokens: - type: integer - type: - const: vector - default: vector - type: string - required: - - type - - embedding_model - - chunk_size_in_tokens - type: object - - additionalProperties: false - properties: - type: - const: keyvalue - default: keyvalue - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: keyword - default: keyword - type: string - required: - - type - type: object - - additionalProperties: false - properties: - type: - const: graph - default: graph - type: string - required: - - type - type: object - name: - type: string - url: - $ref: '#/components/schemas/URL' - required: - - bank_id - - name - - config - type: object MemoryBankDocument: additionalProperties: false properties: @@ -1107,41 +1027,6 @@ components: - content - metadata type: object - MemoryBankSpec: - additionalProperties: false - properties: - bank_type: - $ref: '#/components/schemas/MemoryBankType' - provider_config: - additionalProperties: false - properties: - config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - provider_type: - type: string - required: - - provider_type - - config - type: object - required: - - bank_type - - provider_config - type: object - MemoryBankType: - enum: - - vector - - keyvalue - - keyword - - graph - type: string MemoryRetrievalStep: additionalProperties: false properties: @@ -1349,36 +1234,18 @@ components: - value - unit type: object - Model: - description: The model family and SKU of the model along with other parameters - corresponding to the model. - ModelServingSpec: + ModelDef: additionalProperties: false properties: + identifier: + type: string llama_model: - $ref: '#/components/schemas/Model' - provider_config: - additionalProperties: false - properties: - config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - provider_type: - type: string - required: - - provider_type - - config - type: object + type: string + provider_id: + type: string required: + - identifier - llama_model - - provider_config type: object OptimizerConfig: additionalProperties: false @@ -1554,13 +1421,13 @@ components: ProviderInfo: additionalProperties: false properties: - description: + provider_id: type: string provider_type: type: string required: + - provider_id - provider_type - - description type: object QLoraFinetuningConfig: additionalProperties: false @@ -1650,6 +1517,56 @@ components: enum: - dpo type: string + RegisterMemoryBankRequest: + additionalProperties: false + properties: + memory_bank: + oneOf: + - $ref: '#/components/schemas/VectorMemoryBankDef' + - $ref: '#/components/schemas/KeyValueMemoryBankDef' + - $ref: '#/components/schemas/KeywordMemoryBankDef' + - $ref: '#/components/schemas/GraphMemoryBankDef' + required: + - memory_bank + type: object + RegisterModelRequest: + additionalProperties: false + properties: + model: + $ref: '#/components/schemas/ModelDef' + required: + - model + type: object + RegisterShieldRequest: + additionalProperties: false + properties: + shield: + additionalProperties: false + properties: + identifier: + type: string + params: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + provider_id: + type: string + type: + type: string + required: + - identifier + - type + - params + type: object + required: + - shield + type: object RestAPIExecutionConfig: additionalProperties: false properties: @@ -1728,7 +1645,7 @@ components: properties: method: type: string - providers: + provider_types: items: type: string type: array @@ -1737,7 +1654,7 @@ components: required: - route - method - - providers + - provider_types type: object RunShieldRequest: additionalProperties: false @@ -1892,7 +1809,11 @@ components: additionalProperties: false properties: memory_bank: - $ref: '#/components/schemas/MemoryBank' + oneOf: + - $ref: '#/components/schemas/VectorMemoryBankDef' + - $ref: '#/components/schemas/KeyValueMemoryBankDef' + - $ref: '#/components/schemas/KeywordMemoryBankDef' + - $ref: '#/components/schemas/GraphMemoryBankDef' session_id: type: string session_name: @@ -1935,33 +1856,29 @@ components: - step_id - step_type type: object - ShieldSpec: + ShieldDef: additionalProperties: false properties: - provider_config: - additionalProperties: false - properties: - config: - additionalProperties: - oneOf: - - type: 'null' - - type: boolean - - type: number - - type: string - - type: array - - type: object - type: object - provider_type: - type: string - required: - - provider_type - - config + identifier: + type: string + params: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object type: object - shield_type: + provider_id: + type: string + type: type: string required: - - shield_type - - provider_config + - identifier + - type + - params type: object SpanEndPayload: additionalProperties: false @@ -2571,6 +2488,29 @@ components: - role - content type: object + VectorMemoryBankDef: + additionalProperties: false + properties: + chunk_size_in_tokens: + type: integer + embedding_model: + type: string + identifier: + type: string + overlap_size_in_tokens: + type: integer + provider_id: + type: string + type: + const: vector + default: vector + type: string + required: + - identifier + - type + - embedding_model + - chunk_size_in_tokens + type: object ViolationLevel: enum: - info @@ -2604,7 +2544,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-10-02 15:40:53.008257" + \ draft and subject to change.\n Generated at 2024-10-08 15:18:57.600111" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -3226,7 +3166,7 @@ paths: description: OK tags: - Inference - /memory/create: + /inference/register_model: post: parameters: - description: JSON-encoded provider data which will be made available to the @@ -3240,17 +3180,13 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/CreateMemoryBankRequest' + $ref: '#/components/schemas/RegisterModelRequest' required: true responses: '200': - content: - application/json: - schema: - $ref: '#/components/schemas/MemoryBank' description: OK tags: - - Memory + - Models /memory/documents/delete: post: parameters: @@ -3302,57 +3238,6 @@ paths: description: OK tags: - Memory - /memory/drop: - post: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/DropMemoryBankRequest' - required: true - responses: - '200': - content: - application/json: - schema: - type: string - description: OK - tags: - - Memory - /memory/get: - get: - parameters: - - in: query - name: bank_id - required: true - schema: - type: string - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/json: - schema: - oneOf: - - $ref: '#/components/schemas/MemoryBank' - - type: 'null' - description: OK - tags: - - Memory /memory/insert: post: parameters: @@ -3374,25 +3259,6 @@ paths: description: OK tags: - Memory - /memory/list: - get: - parameters: - - description: JSON-encoded provider data which will be made available to the - adapter servicing the API - in: header - name: X-LlamaStack-ProviderData - required: false - schema: - type: string - responses: - '200': - content: - application/jsonl: - schema: - $ref: '#/components/schemas/MemoryBank' - description: OK - tags: - - Memory /memory/query: post: parameters: @@ -3443,10 +3309,10 @@ paths: get: parameters: - in: query - name: bank_type + name: identifier required: true schema: - $ref: '#/components/schemas/MemoryBankType' + type: string - description: JSON-encoded provider data which will be made available to the adapter servicing the API in: header @@ -3460,7 +3326,11 @@ paths: application/json: schema: oneOf: - - $ref: '#/components/schemas/MemoryBankSpec' + - oneOf: + - $ref: '#/components/schemas/VectorMemoryBankDef' + - $ref: '#/components/schemas/KeyValueMemoryBankDef' + - $ref: '#/components/schemas/KeywordMemoryBankDef' + - $ref: '#/components/schemas/GraphMemoryBankDef' - type: 'null' description: OK tags: @@ -3480,15 +3350,40 @@ paths: content: application/jsonl: schema: - $ref: '#/components/schemas/MemoryBankSpec' + oneOf: + - $ref: '#/components/schemas/VectorMemoryBankDef' + - $ref: '#/components/schemas/KeyValueMemoryBankDef' + - $ref: '#/components/schemas/KeywordMemoryBankDef' + - $ref: '#/components/schemas/GraphMemoryBankDef' description: OK tags: - MemoryBanks + /memory_banks/register: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterMemoryBankRequest' + required: true + responses: + '200': + description: OK + tags: + - Memory /models/get: get: parameters: - in: query - name: core_model_id + name: identifier required: true schema: type: string @@ -3505,7 +3400,7 @@ paths: application/json: schema: oneOf: - - $ref: '#/components/schemas/ModelServingSpec' + - $ref: '#/components/schemas/ModelDef' - type: 'null' description: OK tags: @@ -3525,7 +3420,7 @@ paths: content: application/jsonl: schema: - $ref: '#/components/schemas/ModelServingSpec' + $ref: '#/components/schemas/ModelDef' description: OK tags: - Models @@ -3760,6 +3655,27 @@ paths: description: OK tags: - Inspect + /safety/register_shield: + post: + parameters: + - description: JSON-encoded provider data which will be made available to the + adapter servicing the API + in: header + name: X-LlamaStack-ProviderData + required: false + schema: + type: string + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/RegisterShieldRequest' + required: true + responses: + '200': + description: OK + tags: + - Shields /safety/run_shield: post: parameters: @@ -3806,7 +3722,29 @@ paths: application/json: schema: oneOf: - - $ref: '#/components/schemas/ShieldSpec' + - additionalProperties: false + properties: + identifier: + type: string + params: + additionalProperties: + oneOf: + - type: 'null' + - type: boolean + - type: number + - type: string + - type: array + - type: object + type: object + provider_id: + type: string + type: + type: string + required: + - identifier + - type + - params + type: object - type: 'null' description: OK tags: @@ -3826,7 +3764,7 @@ paths: content: application/jsonl: schema: - $ref: '#/components/schemas/ShieldSpec' + $ref: '#/components/schemas/ShieldDef' description: OK tags: - Shields @@ -3905,21 +3843,21 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: Datasets -- name: Inspect -- name: Memory - name: BatchInference -- name: Agents +- name: Datasets - name: Inference -- name: Shields -- name: SyntheticDataGeneration -- name: Models -- name: RewardScoring -- name: MemoryBanks -- name: Safety - name: Evaluations -- name: Telemetry +- name: Memory +- name: Safety - name: PostTraining +- name: MemoryBanks +- name: Models +- name: Shields +- name: Inspect +- name: SyntheticDataGeneration +- name: Telemetry +- name: Agents +- name: RewardScoring - description: name: BuiltinTool - description: name: CreateDatasetRequest -- description: - name: CreateMemoryBankRequest -- description: - name: MemoryBank - description: name: DeleteAgentsRequest @@ -4140,9 +4073,6 @@ tags: - description: name: DeleteDocumentsRequest -- description: - name: DropMemoryBankRequest - description: name: EmbeddingsRequest @@ -4163,11 +4093,23 @@ tags: - description: name: GetAgentsSessionRequest +- description: + name: GraphMemoryBankDef +- description: + name: KeyValueMemoryBankDef +- description: + name: KeywordMemoryBankDef - description: 'A single session of an interaction with an Agentic System. ' name: Session +- description: + name: VectorMemoryBankDef - description: name: AgentStepResponse @@ -4189,21 +4131,8 @@ tags: - description: name: EvaluationJobStatusResponse -- description: 'The model family and SKU of the model along with other parameters - corresponding to the model. - - - ' - name: Model -- description: - name: ModelServingSpec -- description: - name: MemoryBankType -- description: - name: MemoryBankSpec -- description: - name: ShieldSpec +- description: + name: ModelDef - description: name: Trace - description: 'Checkpoint created during training runs @@ -4243,6 +4172,8 @@ tags: name: ProviderInfo - description: name: RouteInfo +- description: + name: ShieldDef - description: name: LogSeverity - description: @@ -4282,6 +4213,15 @@ tags: - description: name: QueryDocumentsResponse +- description: + name: RegisterMemoryBankRequest +- description: + name: RegisterModelRequest +- description: + name: RegisterShieldRequest - description: name: DialogGenerations @@ -4387,7 +4327,6 @@ x-tagGroups: - CreateAgentSessionRequest - CreateAgentTurnRequest - CreateDatasetRequest - - CreateMemoryBankRequest - DPOAlignmentConfig - DeleteAgentsRequest - DeleteAgentsSessionRequest @@ -4395,7 +4334,6 @@ x-tagGroups: - DeleteDocumentsRequest - DialogGenerations - DoraFinetuningConfig - - DropMemoryBankRequest - EmbeddingsRequest - EmbeddingsResponse - EvaluateQuestionAnsweringRequest @@ -4409,22 +4347,21 @@ x-tagGroups: - FunctionCallToolDefinition - GetAgentsSessionRequest - GetDocumentsRequest + - GraphMemoryBankDef - HealthInfo - ImageMedia - InferenceStep - InsertDocumentsRequest + - KeyValueMemoryBankDef + - KeywordMemoryBankDef - LogEventRequest - LogSeverity - LoraFinetuningConfig - - MemoryBank - MemoryBankDocument - - MemoryBankSpec - - MemoryBankType - MemoryRetrievalStep - MemoryToolDefinition - MetricEvent - - Model - - ModelServingSpec + - ModelDef - OptimizerConfig - PhotogenToolDefinition - PostTrainingJob @@ -4438,6 +4375,9 @@ x-tagGroups: - QueryDocumentsRequest - QueryDocumentsResponse - RLHFAlgorithm + - RegisterMemoryBankRequest + - RegisterModelRequest + - RegisterShieldRequest - RestAPIExecutionConfig - RestAPIMethod - RewardScoreRequest @@ -4453,7 +4393,7 @@ x-tagGroups: - SearchToolDefinition - Session - ShieldCallStep - - ShieldSpec + - ShieldDef - SpanEndPayload - SpanStartPayload - SpanStatus @@ -4483,5 +4423,6 @@ x-tagGroups: - UnstructuredLogEvent - UpdateDocumentsRequest - UserMessage + - VectorMemoryBankDef - ViolationLevel - WolframAlphaToolDefinition From 4540d8bd87e19a1365bba7cd44c27e3b584ae9b9 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 15:45:50 -0700 Subject: [PATCH 62/69] move codeshield into an independent safety provider --- .../meta_reference/codeshield/__init__.py | 15 +++++ .../meta_reference/codeshield/code_scanner.py | 58 +++++++++++++++++++ .../impls/meta_reference/codeshield/config.py | 11 ++++ .../safety/{shields => }/base.py | 7 --- .../safety/{shields => }/llama_guard.py | 0 .../safety/{shields => }/prompt_guard.py | 0 .../impls/meta_reference/safety/safety.py | 20 ++----- .../meta_reference/safety/shields/__init__.py | 33 ----------- .../safety/shields/code_scanner.py | 27 --------- llama_stack/providers/registry/safety.py | 11 +++- 10 files changed, 98 insertions(+), 84 deletions(-) create mode 100644 llama_stack/providers/impls/meta_reference/codeshield/__init__.py create mode 100644 llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py create mode 100644 llama_stack/providers/impls/meta_reference/codeshield/config.py rename llama_stack/providers/impls/meta_reference/safety/{shields => }/base.py (88%) rename llama_stack/providers/impls/meta_reference/safety/{shields => }/llama_guard.py (100%) rename llama_stack/providers/impls/meta_reference/safety/{shields => }/prompt_guard.py (100%) delete mode 100644 llama_stack/providers/impls/meta_reference/safety/shields/__init__.py delete mode 100644 llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py diff --git a/llama_stack/providers/impls/meta_reference/codeshield/__init__.py b/llama_stack/providers/impls/meta_reference/codeshield/__init__.py new file mode 100644 index 000000000..665c5c637 --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/codeshield/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .config import CodeShieldConfig + + +async def get_provider_impl(config: CodeShieldConfig, deps): + from .code_scanner import MetaReferenceCodeScannerSafetyImpl + + impl = MetaReferenceCodeScannerSafetyImpl(config, deps) + await impl.initialize() + return impl diff --git a/llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py b/llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py new file mode 100644 index 000000000..37ea96270 --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/codeshield/code_scanner.py @@ -0,0 +1,58 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Any, Dict, List + +from llama_models.llama3.api.datatypes import interleaved_text_media_as_str, Message +from termcolor import cprint + +from .config import CodeScannerConfig + +from llama_stack.apis.safety import * # noqa: F403 + + +class MetaReferenceCodeScannerSafetyImpl(Safety): + def __init__(self, config: CodeScannerConfig, deps) -> None: + self.config = config + + async def initialize(self) -> None: + pass + + async def shutdown(self) -> None: + pass + + async def register_shield(self, shield: ShieldDef) -> None: + if shield.type != ShieldType.code_scanner.value: + raise ValueError(f"Unsupported safety shield type: {shield.type}") + + async def run_shield( + self, + shield_type: str, + messages: List[Message], + params: Dict[str, Any] = None, + ) -> RunShieldResponse: + shield_def = await self.shield_store.get_shield(shield_type) + if not shield_def: + raise ValueError(f"Unknown shield {shield_type}") + + from codeshield.cs import CodeShield + + text = "\n".join([interleaved_text_media_as_str(m.content) for m in messages]) + cprint(f"Running CodeScannerShield on {text[50:]}", color="magenta") + result = await CodeShield.scan_code(text) + + violation = None + if result.is_insecure: + violation = SafetyViolation( + violation_level=(ViolationLevel.ERROR), + user_message="Sorry, I found security concerns in the code.", + metadata={ + "violation_type": ",".join( + [issue.pattern_id for issue in result.issues_found] + ) + }, + ) + return RunShieldResponse(violation=violation) diff --git a/llama_stack/providers/impls/meta_reference/codeshield/config.py b/llama_stack/providers/impls/meta_reference/codeshield/config.py new file mode 100644 index 000000000..583c2c95f --- /dev/null +++ b/llama_stack/providers/impls/meta_reference/codeshield/config.py @@ -0,0 +1,11 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from pydantic import BaseModel + + +class CodeShieldConfig(BaseModel): + pass diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/base.py b/llama_stack/providers/impls/meta_reference/safety/base.py similarity index 88% rename from llama_stack/providers/impls/meta_reference/safety/shields/base.py rename to llama_stack/providers/impls/meta_reference/safety/base.py index 6a03d1e61..3861a7c4a 100644 --- a/llama_stack/providers/impls/meta_reference/safety/shields/base.py +++ b/llama_stack/providers/impls/meta_reference/safety/base.py @@ -44,7 +44,6 @@ def message_content_as_str(message: Message) -> str: return interleaved_text_media_as_str(message.content) -# For shields that operate on simple strings class TextShield(ShieldBase): def convert_messages_to_text(self, messages: List[Message]) -> str: return "\n".join([message_content_as_str(m) for m in messages]) @@ -56,9 +55,3 @@ class TextShield(ShieldBase): @abstractmethod async def run_impl(self, text: str) -> ShieldResponse: raise NotImplementedError() - - -class DummyShield(TextShield): - async def run_impl(self, text: str) -> ShieldResponse: - # Dummy return LOW to test e2e - return ShieldResponse(is_violation=False) diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py b/llama_stack/providers/impls/meta_reference/safety/llama_guard.py similarity index 100% rename from llama_stack/providers/impls/meta_reference/safety/shields/llama_guard.py rename to llama_stack/providers/impls/meta_reference/safety/llama_guard.py diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py b/llama_stack/providers/impls/meta_reference/safety/prompt_guard.py similarity index 100% rename from llama_stack/providers/impls/meta_reference/safety/shields/prompt_guard.py rename to llama_stack/providers/impls/meta_reference/safety/prompt_guard.py diff --git a/llama_stack/providers/impls/meta_reference/safety/safety.py b/llama_stack/providers/impls/meta_reference/safety/safety.py index 5d6747f9f..7457bf246 100644 --- a/llama_stack/providers/impls/meta_reference/safety/safety.py +++ b/llama_stack/providers/impls/meta_reference/safety/safety.py @@ -12,19 +12,11 @@ from llama_stack.apis.safety import * # noqa: F403 from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.distribution.datatypes import Api -from llama_stack.providers.impls.meta_reference.safety.shields.base import ( - OnViolationAction, -) - +from .base import OnViolationAction, ShieldBase from .config import SafetyConfig +from .llama_guard import LlamaGuardShield +from .prompt_guard import InjectionShield, JailbreakShield, PromptGuardShield -from .shields import ( - CodeScannerShield, - InjectionShield, - JailbreakShield, - LlamaGuardShield, - ShieldBase, -) PROMPT_GUARD_MODEL = "Prompt-Guard-86M" @@ -34,7 +26,7 @@ class MetaReferenceSafetyImpl(Safety): self.config = config self.inference_api = deps[Api.inference] - self.available_shields = [ShieldType.code_scanner.value] + self.available_shields = [] if config.llama_guard_shield: self.available_shields.append(ShieldType.llama_guard.value) if config.enable_prompt_guard: @@ -42,8 +34,6 @@ class MetaReferenceSafetyImpl(Safety): async def initialize(self) -> None: if self.config.enable_prompt_guard: - from .shields import PromptGuardShield - model_dir = model_local_dir(PROMPT_GUARD_MODEL) _ = PromptGuardShield.instance(model_dir) @@ -107,7 +97,5 @@ class MetaReferenceSafetyImpl(Safety): return JailbreakShield.instance(model_dir) else: raise ValueError(f"Unknown prompt guard type: {subtype}") - elif shield.type == ShieldType.code_scanner.value: - return CodeScannerShield.instance() else: raise ValueError(f"Unknown shield type: {shield.type}") diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/__init__.py b/llama_stack/providers/impls/meta_reference/safety/shields/__init__.py deleted file mode 100644 index 9caf10883..000000000 --- a/llama_stack/providers/impls/meta_reference/safety/shields/__init__.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -# supress warnings and spew of logs from hugging face -import transformers - -from .base import ( # noqa: F401 - DummyShield, - OnViolationAction, - ShieldBase, - ShieldResponse, - TextShield, -) -from .code_scanner import CodeScannerShield # noqa: F401 -from .llama_guard import LlamaGuardShield # noqa: F401 -from .prompt_guard import ( # noqa: F401 - InjectionShield, - JailbreakShield, - PromptGuardShield, -) - -transformers.logging.set_verbosity_error() - -import os - -os.environ["TOKENIZERS_PARALLELISM"] = "false" - -import warnings - -warnings.filterwarnings("ignore") diff --git a/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py b/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py deleted file mode 100644 index 9b043ff04..000000000 --- a/llama_stack/providers/impls/meta_reference/safety/shields/code_scanner.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from termcolor import cprint - -from .base import ShieldResponse, TextShield - - -class CodeScannerShield(TextShield): - async def run_impl(self, text: str) -> ShieldResponse: - from codeshield.cs import CodeShield - - cprint(f"Running CodeScannerShield on {text[50:]}", color="magenta") - result = await CodeShield.scan_code(text) - if result.is_insecure: - return ShieldResponse( - is_violation=True, - violation_type=",".join( - [issue.pattern_id for issue in result.issues_found] - ), - violation_return_message="Sorry, I found security concerns in the code.", - ) - else: - return ShieldResponse(is_violation=False) diff --git a/llama_stack/providers/registry/safety.py b/llama_stack/providers/registry/safety.py index 58307be11..3fa62479a 100644 --- a/llama_stack/providers/registry/safety.py +++ b/llama_stack/providers/registry/safety.py @@ -21,7 +21,6 @@ def available_providers() -> List[ProviderSpec]: api=Api.safety, provider_type="meta-reference", pip_packages=[ - "codeshield", "transformers", "torch --index-url https://download.pytorch.org/whl/cpu", ], @@ -61,4 +60,14 @@ def available_providers() -> List[ProviderSpec]: provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator", ), ), + InlineProviderSpec( + api=Api.safety, + provider_type="meta-reference/codeshield", + pip_packages=[ + "codeshield", + ], + module="llama_stack.providers.impls.meta_reference.codeshield", + config_class="llama_stack.providers.impls.meta_reference.codeshield.CodeShieldConfig", + api_dependencies=[], + ), ] From 216e7eb4d5f47290a94f3b97fd3eac7439aab4dc Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 16:53:05 -0700 Subject: [PATCH 63/69] Move `async with SEMAPHORE` inside the async methods --- .../meta_reference/inference/inference.py | 269 +++++++++--------- 1 file changed, 138 insertions(+), 131 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index 9e31f0834..43a131647 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -85,112 +85,36 @@ class MetaReferenceInferenceImpl(Inference): if SEMAPHORE.locked(): raise RuntimeError("Only one concurrent request is supported") - async with SEMAPHORE: - if request.stream: - return self._stream_chat_completion(request) - else: - return self._nonstream_chat_completion(request) + if request.stream: + return self._stream_chat_completion(request) + else: + return self._nonstream_chat_completion(request) async def _nonstream_chat_completion( self, request: ChatCompletionRequest ) -> ChatCompletionResponse: - messages = chat_completion_request_to_messages(request) + async with SEMAPHORE: + messages = chat_completion_request_to_messages(request) - tokens = [] - logprobs = [] - stop_reason = None + tokens = [] + logprobs = [] + stop_reason = None - for token_result in self.generator.chat_completion( - messages=messages, - temperature=request.sampling_params.temperature, - top_p=request.sampling_params.top_p, - max_gen_len=request.sampling_params.max_tokens, - logprobs=request.logprobs, - tool_prompt_format=request.tool_prompt_format, - ): - tokens.append(token_result.token) + for token_result in self.generator.chat_completion( + messages=messages, + temperature=request.sampling_params.temperature, + top_p=request.sampling_params.top_p, + max_gen_len=request.sampling_params.max_tokens, + logprobs=request.logprobs, + tool_prompt_format=request.tool_prompt_format, + ): + tokens.append(token_result.token) - if token_result.text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - elif token_result.text == "<|eom_id|>": - stop_reason = StopReason.end_of_message + if token_result.text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + elif token_result.text == "<|eom_id|>": + stop_reason = StopReason.end_of_message - if request.logprobs: - assert len(token_result.logprobs) == 1 - - logprobs.append( - TokenLogProbs( - logprobs_by_token={token_result.text: token_result.logprobs[0]} - ) - ) - - if stop_reason is None: - stop_reason = StopReason.out_of_tokens - - message = self.generator.formatter.decode_assistant_message(tokens, stop_reason) - return ChatCompletionResponse( - completion_message=message, - logprobs=logprobs if request.logprobs else None, - ) - - async def _stream_chat_completion( - self, request: ChatCompletionRequest - ) -> AsyncGenerator: - messages = chat_completion_request_to_messages(request) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.start, - delta="", - ) - ) - - tokens = [] - logprobs = [] - stop_reason = None - ipython = False - - for token_result in self.generator.chat_completion( - messages=messages, - temperature=request.sampling_params.temperature, - top_p=request.sampling_params.top_p, - max_gen_len=request.sampling_params.max_tokens, - logprobs=request.logprobs, - tool_prompt_format=request.tool_prompt_format, - ): - tokens.append(token_result.token) - - if not ipython and token_result.text.startswith("<|python_tag|>"): - ipython = True - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.started, - ), - ) - ) - continue - - if token_result.text == "<|eot_id|>": - stop_reason = StopReason.end_of_turn - text = "" - elif token_result.text == "<|eom_id|>": - stop_reason = StopReason.end_of_message - text = "" - else: - text = token_result.text - - if ipython: - delta = ToolCallDelta( - content=text, - parse_status=ToolCallParseStatus.in_progress, - ) - else: - delta = text - - if stop_reason is None: if request.logprobs: assert len(token_result.logprobs) == 1 @@ -201,49 +125,132 @@ class MetaReferenceInferenceImpl(Inference): } ) ) + + if stop_reason is None: + stop_reason = StopReason.out_of_tokens + + message = self.generator.formatter.decode_assistant_message( + tokens, stop_reason + ) + return ChatCompletionResponse( + completion_message=message, + logprobs=logprobs if request.logprobs else None, + ) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest + ) -> AsyncGenerator: + async with SEMAPHORE: + messages = chat_completion_request_to_messages(request) + + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.start, + delta="", + ) + ) + + tokens = [] + logprobs = [] + stop_reason = None + ipython = False + + for token_result in self.generator.chat_completion( + messages=messages, + temperature=request.sampling_params.temperature, + top_p=request.sampling_params.top_p, + max_gen_len=request.sampling_params.max_tokens, + logprobs=request.logprobs, + tool_prompt_format=request.tool_prompt_format, + ): + tokens.append(token_result.token) + + if not ipython and token_result.text.startswith("<|python_tag|>"): + ipython = True + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.started, + ), + ) + ) + continue + + if token_result.text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + elif token_result.text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + else: + text = token_result.text + + if ipython: + delta = ToolCallDelta( + content=text, + parse_status=ToolCallParseStatus.in_progress, + ) + else: + delta = text + + if stop_reason is None: + if request.logprobs: + assert len(token_result.logprobs) == 1 + + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ) + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=delta, + stop_reason=stop_reason, + logprobs=logprobs if request.logprobs else None, + ) + ) + + if stop_reason is None: + stop_reason = StopReason.out_of_tokens + + message = self.generator.formatter.decode_assistant_message( + tokens, stop_reason + ) + + parsed_tool_calls = len(message.tool_calls) > 0 + if ipython and not parsed_tool_calls: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( event_type=ChatCompletionResponseEventType.progress, - delta=delta, + delta=ToolCallDelta( + content="", + parse_status=ToolCallParseStatus.failure, + ), stop_reason=stop_reason, - logprobs=logprobs if request.logprobs else None, ) ) - if stop_reason is None: - stop_reason = StopReason.out_of_tokens + for tool_call in message.tool_calls: + yield ChatCompletionResponseStreamChunk( + event=ChatCompletionResponseEvent( + event_type=ChatCompletionResponseEventType.progress, + delta=ToolCallDelta( + content=tool_call, + parse_status=ToolCallParseStatus.success, + ), + stop_reason=stop_reason, + ) + ) - message = self.generator.formatter.decode_assistant_message(tokens, stop_reason) - - parsed_tool_calls = len(message.tool_calls) > 0 - if ipython and not parsed_tool_calls: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content="", - parse_status=ToolCallParseStatus.failure, - ), + event_type=ChatCompletionResponseEventType.complete, + delta="", stop_reason=stop_reason, ) ) - - for tool_call in message.tool_calls: - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.progress, - delta=ToolCallDelta( - content=tool_call, - parse_status=ToolCallParseStatus.success, - ), - stop_reason=stop_reason, - ) - ) - - yield ChatCompletionResponseStreamChunk( - event=ChatCompletionResponseEvent( - event_type=ChatCompletionResponseEventType.complete, - delta="", - stop_reason=stop_reason, - ) - ) From 8eee5b9adc3e55e4f2befcbf19e10368e3629f5c Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 17:03:31 -0700 Subject: [PATCH 64/69] Fix server conditional awaiting on coroutines --- llama_stack/distribution/server/server.py | 13 ++++++++----- .../impls/meta_reference/inference/inference.py | 1 + 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 5c1a7806d..dd499db6b 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -193,6 +193,12 @@ def is_streaming_request(func_name: str, request: Request, **kwargs): return kwargs.get("stream", False) +async def maybe_await(value): + if inspect.iscoroutine(value): + return await value + return value + + async def sse_generator(event_gen): try: async for item in event_gen: @@ -228,11 +234,8 @@ def create_dynamic_typed_route(func: Any, method: str): sse_generator(func(**kwargs)), media_type="text/event-stream" ) else: - return ( - await func(**kwargs) - if asyncio.iscoroutinefunction(func) - else func(**kwargs) - ) + value = func(**kwargs) + return await maybe_await(value) except Exception as e: traceback.print_exception(e) raise translate_exception(e) from e diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index 43a131647..bda5e54c1 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -34,6 +34,7 @@ class MetaReferenceInferenceImpl(Inference): # verify that the checkpoint actually is for this model lol async def initialize(self) -> None: + print(f"Loading model `{self.model.descriptor()}`") self.generator = LlamaModelParallelGenerator(self.config) self.generator.start() From f40cd623060b5d22c009ff261a8da5173b3c0348 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 16:57:52 -0700 Subject: [PATCH 65/69] Test fixes --- llama_stack/apis/inference/client.py | 2 +- llama_stack/providers/tests/inference/test_inference.py | 1 - llama_stack/providers/tests/memory/test_memory.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index c7b865ebf..79d2cc02c 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -84,7 +84,7 @@ class InferenceClient(Inference): response.raise_for_status() j = response.json() - yield ChatCompletionResponse(**j) + return ChatCompletionResponse(**j) async def _stream_chat_completion( self, request: ChatCompletionRequest diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index b864c2ef4..f52de0df1 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -68,7 +68,6 @@ async def inference_settings(request): ModelDef( identifier=model, llama_model=model, - provider_id="", ) ], ) diff --git a/llama_stack/providers/tests/memory/test_memory.py b/llama_stack/providers/tests/memory/test_memory.py index 4351ae699..70f8ba4aa 100644 --- a/llama_stack/providers/tests/memory/test_memory.py +++ b/llama_stack/providers/tests/memory/test_memory.py @@ -67,7 +67,6 @@ def sample_documents(): async def register_memory_bank(memory_impl: Memory): bank = VectorMemoryBankDef( identifier="test_bank", - provider_id="", embedding_model="all-MiniLM-L6-v2", chunk_size_in_tokens=512, overlap_size_in_tokens=64, From 924b1fba095d3d5c551b655fd14965cdab2a17df Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 17:26:26 -0700 Subject: [PATCH 66/69] minor --- llama_stack/distribution/server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index dd499db6b..9f362e023 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -285,7 +285,7 @@ def main( else: apis_to_serve = set(impls.keys()) - apis_to_serve.add(Api.inspect) + apis_to_serve.add("inspect") for api_str in apis_to_serve: api = Api(api_str) From a86f3ae07d69df72688affe9589321a5942a50ef Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 17:41:06 -0700 Subject: [PATCH 67/69] Update run.yaml --- llama_stack/distribution/configure.py | 2 +- tests/examples/local-run.yaml | 78 +++++++++++++++++---------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/llama_stack/distribution/configure.py b/llama_stack/distribution/configure.py index f533422fe..2811d4142 100644 --- a/llama_stack/distribution/configure.py +++ b/llama_stack/distribution/configure.py @@ -335,7 +335,7 @@ def upgrade_from_routing_table_to_registry( provider_map = config_dict.get("api_providers", config_dict.get("provider_map", {})) if provider_map: for api_str, provider in provider_map.items(): - if isinstance(provider, dict): + if isinstance(provider, dict) and "provider_type" in provider: providers_by_api[api_str] = [ Provider( provider_id=f"{provider['provider_type']}", diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index e4319750a..108c805f8 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -1,8 +1,10 @@ -built_at: '2024-09-23T00:54:40.551416' +Upgrading config... +version: '2' +built_at: '2024-10-08T17:40:45.325529' image_name: local docker_image: null conda_env: local -apis_to_serve: +apis: - shields - agents - models @@ -10,38 +12,19 @@ apis_to_serve: - memory_banks - inference - safety -api_providers: +providers: inference: - providers: - - meta-reference - safety: - providers: - - meta-reference - agents: + - provider_id: meta-reference provider_type: meta-reference - config: - persistence_store: - namespace: null - type: sqlite - db_path: /home/xiyan/.llama/runtime/kvstore.db - memory: - providers: - - meta-reference - telemetry: - provider_type: meta-reference - config: {} -routing_table: - inference: - - provider_type: meta-reference config: model: Llama3.1-8B-Instruct quantization: null torch_seed: null max_seq_len: 4096 max_batch_size: 1 - routing_key: Llama3.1-8B-Instruct safety: - - provider_type: meta-reference + - provider_id: meta-reference + provider_type: meta-reference config: llama_guard_shield: model: Llama-Guard-3-1B @@ -50,8 +33,47 @@ routing_table: disable_output_check: false prompt_guard_shield: model: Prompt-Guard-86M - routing_key: ["llama_guard", "code_scanner_guard", "injection_shield", "jailbreak_shield"] memory: - - provider_type: meta-reference + - provider_id: meta-reference + provider_type: meta-reference config: {} - routing_key: vector + agents: + - provider_id: meta-reference + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: /home/xiyan/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta-reference + provider_type: meta-reference + config: {} +models: +- identifier: Llama3.1-8B-Instruct + llama_model: Llama3.1-8B-Instruct + provider_id: meta-reference +shields: +- identifier: llama_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: code_scanner_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: injection_shield + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: jailbreak_shield + type: llama_guard + provider_id: meta-reference + params: {} +memory_banks: +- identifier: vector + provider_id: meta-reference + type: vector + embedding_model: all-MiniLM-L6-v2 + chunk_size_in_tokens: 512 + overlap_size_in_tokens: null From 24c61403b74ce913a2eb607cdc903360b9dc7877 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 17:43:25 -0700 Subject: [PATCH 68/69] Fixes --- .../docker/llamastack-local-cpu/run.yaml | 75 +++++++++++------- .../docker/llamastack-local-gpu/run.yaml | 77 ++++++++++++------- tests/examples/local-run.yaml | 1 - 3 files changed, 97 insertions(+), 56 deletions(-) diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml index 62b615a50..1efa28cdc 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -1,8 +1,9 @@ -built_at: '2024-09-30T09:04:30.533391' +version: '2' +built_at: '2024-10-08T17:42:07.505267' image_name: local-cpu docker_image: local-cpu conda_env: null -apis_to_serve: +apis: - agents - inference - models @@ -10,40 +11,60 @@ apis_to_serve: - safety - shields - memory_banks -api_providers: +providers: inference: - providers: - - remote::ollama + - provider_id: remote::ollama + provider_type: remote::ollama + config: + host: localhost + port: 6000 safety: - providers: - - meta-reference + - provider_id: meta-reference + provider_type: meta-reference + config: + llama_guard_shield: null + prompt_guard_shield: null + memory: + - provider_id: meta-reference + provider_type: meta-reference + config: {} agents: + - provider_id: meta-reference provider_type: meta-reference config: persistence_store: namespace: null type: sqlite db_path: ~/.llama/runtime/kvstore.db - memory: - providers: - - meta-reference telemetry: + - provider_id: meta-reference provider_type: meta-reference config: {} -routing_table: - inference: - - provider_type: remote::ollama - config: - host: localhost - port: 6000 - routing_key: Llama3.1-8B-Instruct - safety: - - provider_type: meta-reference - config: - llama_guard_shield: null - prompt_guard_shield: null - routing_key: ["llama_guard", "code_scanner_guard", "injection_shield", "jailbreak_shield"] - memory: - - provider_type: meta-reference - config: {} - routing_key: vector +models: +- identifier: Llama3.1-8B-Instruct + llama_model: Llama3.1-8B-Instruct + provider_id: remote::ollama +shields: +- identifier: llama_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: code_scanner_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: injection_shield + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: jailbreak_shield + type: llama_guard + provider_id: meta-reference + params: {} +memory_banks: +- identifier: vector + provider_id: meta-reference + type: vector + embedding_model: all-MiniLM-L6-v2 + chunk_size_in_tokens: 512 + overlap_size_in_tokens: null diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml index 0004b1780..949e78eda 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml @@ -1,8 +1,9 @@ -built_at: '2024-09-30T09:00:56.693751' +version: '2' +built_at: '2024-10-08T17:42:33.690666' image_name: local-gpu docker_image: local-gpu conda_env: null -apis_to_serve: +apis: - memory - inference - agents @@ -10,43 +11,63 @@ apis_to_serve: - safety - models - memory_banks -api_providers: +providers: inference: - providers: - - meta-reference - safety: - providers: - - meta-reference - agents: + - provider_id: meta-reference provider_type: meta-reference - config: - persistence_store: - namespace: null - type: sqlite - db_path: ~/.llama/runtime/kvstore.db - memory: - providers: - - meta-reference - telemetry: - provider_type: meta-reference - config: {} -routing_table: - inference: - - provider_type: meta-reference config: model: Llama3.1-8B-Instruct quantization: null torch_seed: null max_seq_len: 4096 max_batch_size: 1 - routing_key: Llama3.1-8B-Instruct safety: - - provider_type: meta-reference + - provider_id: meta-reference + provider_type: meta-reference config: llama_guard_shield: null prompt_guard_shield: null - routing_key: ["llama_guard", "code_scanner_guard", "injection_shield", "jailbreak_shield"] memory: - - provider_type: meta-reference + - provider_id: meta-reference + provider_type: meta-reference config: {} - routing_key: vector + agents: + - provider_id: meta-reference + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta-reference + provider_type: meta-reference + config: {} +models: +- identifier: Llama3.1-8B-Instruct + llama_model: Llama3.1-8B-Instruct + provider_id: meta-reference +shields: +- identifier: llama_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: code_scanner_guard + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: injection_shield + type: llama_guard + provider_id: meta-reference + params: {} +- identifier: jailbreak_shield + type: llama_guard + provider_id: meta-reference + params: {} +memory_banks: +- identifier: vector + provider_id: meta-reference + type: vector + embedding_model: all-MiniLM-L6-v2 + chunk_size_in_tokens: 512 + overlap_size_in_tokens: null diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index 108c805f8..365cbb7c6 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -1,4 +1,3 @@ -Upgrading config... version: '2' built_at: '2024-10-08T17:40:45.325529' image_name: local From 73a0a34e392d0ce6a5de8f935bbeb023a89a99a4 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 8 Oct 2024 17:47:03 -0700 Subject: [PATCH 69/69] Kill non-llama guard shields --- .../templates/docker/llamastack-local-cpu/run.yaml | 12 ------------ .../templates/docker/llamastack-local-gpu/run.yaml | 12 ------------ tests/examples/local-run.yaml | 12 ------------ 3 files changed, 36 deletions(-) diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml index 1efa28cdc..3a7514147 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml @@ -49,18 +49,6 @@ shields: type: llama_guard provider_id: meta-reference params: {} -- identifier: code_scanner_guard - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: injection_shield - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: jailbreak_shield - type: llama_guard - provider_id: meta-reference - params: {} memory_banks: - identifier: vector provider_id: meta-reference diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml index 949e78eda..3b9cd6866 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml +++ b/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml @@ -52,18 +52,6 @@ shields: type: llama_guard provider_id: meta-reference params: {} -- identifier: code_scanner_guard - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: injection_shield - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: jailbreak_shield - type: llama_guard - provider_id: meta-reference - params: {} memory_banks: - identifier: vector provider_id: meta-reference diff --git a/tests/examples/local-run.yaml b/tests/examples/local-run.yaml index 365cbb7c6..06006bcb5 100644 --- a/tests/examples/local-run.yaml +++ b/tests/examples/local-run.yaml @@ -57,18 +57,6 @@ shields: type: llama_guard provider_id: meta-reference params: {} -- identifier: code_scanner_guard - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: injection_shield - type: llama_guard - provider_id: meta-reference - params: {} -- identifier: jailbreak_shield - type: llama_guard - provider_id: meta-reference - params: {} memory_banks: - identifier: vector provider_id: meta-reference