From 2128e61da2d3f660deee7e8c2fdf454dc0168a2d Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 11 Oct 2024 11:47:57 -0400 Subject: [PATCH 01/40] Fix incorrect completion() signature for Databricks provider (#236) --- docs/getting_started.md | 2 +- .../providers/adapters/inference/databricks/__init__.py | 3 ++- .../providers/adapters/inference/databricks/config.py | 3 +-- .../adapters/inference/databricks/databricks.py | 9 ++++++++- .../providers/impls/meta_reference/safety/llama_guard.py | 2 +- llama_stack/providers/impls/vllm/__init__.py | 6 ++++++ 6 files changed, 19 insertions(+), 6 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 32f4d2d15..6c8c902c0 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -73,7 +73,7 @@ docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack-local ``` > [!NOTE] -> `~/.llama` should be the path containing downloaded weights of Llama models. +> `~/.llama` should be the path containing downloaded weights of Llama models. #### Via conda diff --git a/llama_stack/providers/adapters/inference/databricks/__init__.py b/llama_stack/providers/adapters/inference/databricks/__init__.py index 097579d25..ca2a0a103 100644 --- a/llama_stack/providers/adapters/inference/databricks/__init__.py +++ b/llama_stack/providers/adapters/inference/databricks/__init__.py @@ -7,10 +7,11 @@ from .config import DatabricksImplConfig from .databricks import DatabricksInferenceAdapter + async def get_adapter_impl(config: DatabricksImplConfig, _deps): assert isinstance( config, DatabricksImplConfig ), f"Unexpected config type: {type(config)}" impl = DatabricksInferenceAdapter(config) await impl.initialize() - return impl \ No newline at end of file + return impl diff --git a/llama_stack/providers/adapters/inference/databricks/config.py b/llama_stack/providers/adapters/inference/databricks/config.py index 927bb474c..ae2b056ea 100644 --- a/llama_stack/providers/adapters/inference/databricks/config.py +++ b/llama_stack/providers/adapters/inference/databricks/config.py @@ -4,7 +4,6 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from typing import Optional from llama_models.schema_utils import json_schema_type from pydantic import BaseModel, Field @@ -19,4 +18,4 @@ class DatabricksImplConfig(BaseModel): api_token: str = Field( default=None, description="The Databricks API token", - ) \ No newline at end of file + ) diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index 2d7427253..7e8263dbf 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -48,7 +48,14 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - def completion(self, request: CompletionRequest) -> AsyncGenerator: + def completion( + self, + model: str, + content: InterleavedTextMedia, + sampling_params: Optional[SamplingParams] = SamplingParams(), + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: raise NotImplementedError() def chat_completion( diff --git a/llama_stack/providers/impls/meta_reference/safety/llama_guard.py b/llama_stack/providers/impls/meta_reference/safety/llama_guard.py index 19a20a899..a6f450fae 100644 --- a/llama_stack/providers/impls/meta_reference/safety/llama_guard.py +++ b/llama_stack/providers/impls/meta_reference/safety/llama_guard.py @@ -170,7 +170,7 @@ class LlamaGuardShield(ShieldBase): for i in range(1, len(messages)): if messages[i].role == messages[i - 1].role: raise ValueError( - f"Messages must alternate between user and assistant. Message {i} has the same role as message {i-1}" + f"Messages must alternate between user and assistant. Message {i} has the same role as message {i - 1}" ) return messages diff --git a/llama_stack/providers/impls/vllm/__init__.py b/llama_stack/providers/impls/vllm/__init__.py index 3d5a81ad9..aa0c4b101 100644 --- a/llama_stack/providers/impls/vllm/__init__.py +++ b/llama_stack/providers/impls/vllm/__init__.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + from typing import Any from .config import VLLMConfig From 05282d12349d9c1034b93f5833e4c8840aa48c15 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 11 Oct 2024 13:03:59 -0400 Subject: [PATCH 02/40] Enable pre-commit on main branch (#237) --- .github/workflows/pre-commit.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 502753976..871e91f4a 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -1,6 +1,9 @@ name: Pre-commit -on: [pull_request] +on: + pull_request: + push: + branches: [main] jobs: pre-commit: From a2b87ed0cb5be83022e13f99bf0b5c17a6524072 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Fri, 11 Oct 2024 14:09:11 -0400 Subject: [PATCH 03/40] Switch to pre-commit/action (#239) --- .github/workflows/pre-commit.yml | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 871e91f4a..dd1a5c6cd 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -22,27 +22,4 @@ jobs: **/requirements*.txt .pre-commit-config.yaml - - name: Install pre-commit - run: | - python -m pip install --upgrade pip - pip install pre-commit - - - name: Fetch base branch - run: git fetch origin ${{ github.event.pull_request.base.ref }}:refs/remotes/origin/${{ github.event.pull_request.base.ref }} - - - name: Fetch head commit from PR - run: git fetch origin pull/${{ github.event.pull_request.number }}/head:pr-${{ github.event.pull_request.number }} - - - name: Get changed files - id: changed-files - run: | - git diff --name-only origin/${{ github.event.pull_request.base.ref }} pr-${{ github.event.pull_request.number }} > changed_files.txt - cat changed_files.txt - - - name: Run pre-commit - run: | - if [ -s changed_files.txt ]; then - pre-commit run --files $(cat changed_files.txt | tr '\n' ' ') - else - echo "No changed files to run pre-commit on." - fi + - uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd #v3.0.1 From 209cd3d35ed34c7db550969539d9b7d29e09f01b Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 14 Oct 2024 11:13:04 -0700 Subject: [PATCH 04/40] Bump version to 0.0.42 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3a24cff21..767f06be8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ blobfile fire httpx huggingface-hub -llama-models>=0.0.41 +llama-models>=0.0.42 prompt-toolkit python-dotenv pydantic>=2 diff --git a/setup.py b/setup.py index 4f4ea7713..466ca655f 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read_requirements(): setup( name="llama_stack", - version="0.0.41", + version="0.0.42", author="Meta Llama", author_email="llama-oss@meta.com", description="Llama Stack", From 80ada04f768071aa2ffc8630b15a822d395c07db Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Tue, 15 Oct 2024 16:03:17 -0400 Subject: [PATCH 05/40] Remove request arg from chat completion response processing (#240) Signed-off-by: Yuan Tang --- .../providers/adapters/inference/databricks/databricks.py | 4 ++-- .../providers/adapters/inference/fireworks/fireworks.py | 4 ++-- llama_stack/providers/adapters/inference/ollama/ollama.py | 4 ++-- llama_stack/providers/adapters/inference/tgi/tgi.py | 4 ++-- .../providers/adapters/inference/together/together.py | 4 ++-- llama_stack/providers/impls/vllm/vllm.py | 4 ++-- llama_stack/providers/utils/inference/openai_compat.py | 8 ++------ 7 files changed, 14 insertions(+), 18 deletions(-) diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index 7e8263dbf..141051186 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -91,7 +91,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): ) -> ChatCompletionResponse: params = self._get_params(request) r = client.completions.create(**params) - return process_chat_completion_response(request, r, self.formatter) + return process_chat_completion_response(r, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest, client: OpenAI @@ -105,7 +105,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): stream = _to_async_generator() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index c85ee00f9..c82012cba 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -94,7 +94,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ) -> ChatCompletionResponse: params = self._get_params(request) r = await client.completion.acreate(**params) - return process_chat_completion_response(request, r, self.formatter) + return process_chat_completion_response(r, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest, client: Fireworks @@ -103,7 +103,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): stream = client.completion.acreate(**params) async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index acf154627..c50c869fd 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -143,7 +143,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): response = OpenAICompatCompletionResponse( choices=[choice], ) - return process_chat_completion_response(request, response, self.formatter) + return process_chat_completion_response(response, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest @@ -163,7 +163,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): stream = _generate_and_convert_to_openai_compat() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index 835649d94..cd0afad0c 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -116,7 +116,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): response = OpenAICompatCompletionResponse( choices=[choice], ) - return process_chat_completion_response(request, response, self.formatter) + return process_chat_completion_response(response, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest @@ -135,7 +135,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): stream = _generate_and_convert_to_openai_compat() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 3231f4657..750ca126e 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -108,7 +108,7 @@ class TogetherInferenceAdapter( ) -> ChatCompletionResponse: params = self._get_params(request) r = client.completions.create(**params) - return process_chat_completion_response(request, r, self.formatter) + return process_chat_completion_response(r, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest, client: Together @@ -123,7 +123,7 @@ class TogetherInferenceAdapter( stream = _to_async_generator() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py index e0b063ac9..5cdb1a2ab 100644 --- a/llama_stack/providers/impls/vllm/vllm.py +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -207,7 +207,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference): response = OpenAICompatCompletionResponse( choices=[choice], ) - return process_chat_completion_response(request, response, self.formatter) + return process_chat_completion_response(response, self.formatter) async def _stream_chat_completion( self, request: ChatCompletionRequest, results_generator: AsyncGenerator @@ -229,7 +229,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference): stream = _generate_and_convert_to_openai_compat() async for chunk in process_chat_completion_stream_response( - request, stream, self.formatter + stream, self.formatter ): yield chunk diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 118880b29..72db7b18c 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -50,9 +50,7 @@ def text_from_choice(choice) -> str: def process_chat_completion_response( - request: ChatCompletionRequest, - response: OpenAICompatCompletionResponse, - formatter: ChatFormat, + response: OpenAICompatCompletionResponse, formatter: ChatFormat ) -> ChatCompletionResponse: choice = response.choices[0] @@ -78,9 +76,7 @@ def process_chat_completion_response( async def process_chat_completion_stream_response( - request: ChatCompletionRequest, - stream: AsyncGenerator[OpenAICompatCompletionResponse, None], - formatter: ChatFormat, + stream: AsyncGenerator[OpenAICompatCompletionResponse, None], formatter: ChatFormat ) -> AsyncGenerator: yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( From 09b793c4d6e981abf2140b35ec9560ce90fa73be Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Tue, 15 Oct 2024 13:57:01 -0700 Subject: [PATCH 06/40] Fix fp8 implementation which had bit-rotten a bit I only tested with "on-the-fly" bf16 -> fp8 conversion, not the "load from fp8" codepath. YAML I tested with: ``` providers: - provider_id: quantized provider_type: meta-reference-quantized config: model: Llama3.1-8B-Instruct quantization: type: fp8 ``` --- .../impls/meta_reference/inference/generation.py | 5 ++--- .../meta_reference/inference/quantization/loader.py | 12 ++++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 8d94a20d1..9037b9acd 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -138,7 +138,7 @@ class Llama: else: model = Transformer(model_args) model.load_state_dict(state_dict, strict=False) - model = convert_to_quantized_model(model, config) + model = convert_to_quantized_model(model, config, ckpt_dir) else: if torch.cuda.is_bf16_supported(): torch.set_default_tensor_type(torch.cuda.BFloat16Tensor) @@ -228,8 +228,7 @@ class Llama: ignore_index=pad_id, ) - stop_tokens = torch.tensor(self.tokenizer.stop_tokens) - + stop_tokens = torch.tensor(self.tokenizer.stop_tokens, device="cuda") for cur_pos in range(min_prompt_len, total_len): if is_vision: position_ids = torch.arange( diff --git a/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py index 92b3a6ce3..bd59fe618 100644 --- a/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py +++ b/llama_stack/providers/impls/meta_reference/inference/quantization/loader.py @@ -13,9 +13,10 @@ from typing import Optional import torch from fairscale.nn.model_parallel.mappings import reduce_from_model_parallel_region - from llama_models.datatypes import CheckpointQuantizationFormat from llama_models.llama3.reference_impl.model import Transformer, TransformerBlock + +from llama_models.sku_list import resolve_model from termcolor import cprint from torch import Tensor @@ -39,6 +40,7 @@ def swiglu_wrapper( def convert_to_quantized_model( model: Transformer, config: MetaReferenceQuantizedInferenceConfig, + checkpoint_dir: str, fp8_activation_scale_ub: Optional[float] = 1200.0, ) -> Transformer: if config.quantization.type == QuantizationType.bf16.value: @@ -49,12 +51,14 @@ def convert_to_quantized_model( from .fp8_impls import Fp8ScaledWeights, load_fp8, quantize_fp8 - checkpoint = config.checkpoint_config.checkpoint + llama_model = resolve_model(config.model) + assert llama_model is not None, f"Model {config.model} not found" + # Move weights to GPU with quantization - if checkpoint.quantization_format == CheckpointQuantizationFormat.fp8_mixed.value: + if llama_model.quantization_format == CheckpointQuantizationFormat.fp8_mixed.value: cprint("Loading fp8 scales...", "yellow") fp8_scales_path = os.path.join( - checkpoint.checkpoint_dir, f"fp8_scales_{get_model_parallel_rank()}.pt" + checkpoint_dir, f"fp8_scales_{get_model_parallel_rank()}.pt" ) assert os.path.isfile( fp8_scales_path From 770647dedec753a3da3cd0f4261e5a8287ed79ae Mon Sep 17 00:00:00 2001 From: Matthieu FRONTON Date: Wed, 16 Oct 2024 00:41:49 +0200 Subject: [PATCH 07/40] Fix broken rendering in Google Colab (#247) --- docs/getting_started.ipynb | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/getting_started.ipynb b/docs/getting_started.ipynb index c2e7326e7..00633b1d3 100644 --- a/docs/getting_started.ipynb +++ b/docs/getting_started.ipynb @@ -43,11 +43,9 @@ "For this purpose, we will directly work with pre-built docker containers and use the python SDK\n", "```\n", "$ git clone https://github.com/meta-llama/llama-stack-apps.git\n", - "\n", "$ cd llama-stack-apps\n", "$ yes | conda create -n stack-test python=3.10 \n", "$ conda activate stack-test\n", - "\n", "$ pip install llama_stack llama_stack_client\n", "```\n", "This will install `llama_stack` and `llama_stack_client` packages. \n", From c4d5d6bb91035f0904b100b64c6167aea2c5e2e2 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 15 Oct 2024 16:32:53 -0700 Subject: [PATCH 08/40] Docker compose scripts for remote adapters (#241) * tgi docker compose * path * wait for tgi server to start before starting server * update provider-id * move scripts to distribution/ folder * add readme * readme --- llama_stack/distribution/docker/README.md | 28 ++++++++++ .../distribution/docker/tgi/compose.yaml | 55 +++++++++++++++++++ .../distribution/docker/tgi/tgi-run.yaml | 46 ++++++++++++++++ 3 files changed, 129 insertions(+) create mode 100644 llama_stack/distribution/docker/README.md create mode 100644 llama_stack/distribution/docker/tgi/compose.yaml create mode 100644 llama_stack/distribution/docker/tgi/tgi-run.yaml diff --git a/llama_stack/distribution/docker/README.md b/llama_stack/distribution/docker/README.md new file mode 100644 index 000000000..962a07def --- /dev/null +++ b/llama_stack/distribution/docker/README.md @@ -0,0 +1,28 @@ +# Docker Compose Scripts + +This folder contains scripts to enable starting a distribution using `docker compose`. + + +#### Example: TGI Inference Adapter +``` +$ cd llama_stack/distribution/docker/tgi +$ ls +compose.yaml tgi-run.yaml +$ docker compose up +``` + +The script will first start up TGI server, then start up Llama Stack distribution server hooking up to the remote TGI provider for inference. You should be able to see the following outputs -- +``` +[text-generation-inference] | 2024-10-15T18:56:33.810397Z INFO text_generation_router::server: router/src/server.rs:1813: Using config Some(Llama) +[text-generation-inference] | 2024-10-15T18:56:33.810448Z WARN text_generation_router::server: router/src/server.rs:1960: Invalid hostname, defaulting to 0.0.0.0 +[text-generation-inference] | 2024-10-15T18:56:33.864143Z INFO text_generation_router::server: router/src/server.rs:2353: Connected +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +To kill the server +``` +docker compose down +``` diff --git a/llama_stack/distribution/docker/tgi/compose.yaml b/llama_stack/distribution/docker/tgi/compose.yaml new file mode 100644 index 000000000..d5bcd50f3 --- /dev/null +++ b/llama_stack/distribution/docker/tgi/compose.yaml @@ -0,0 +1,55 @@ +services: + text-generation-inference: + image: ghcr.io/huggingface/text-generation-inference:latest + network_mode: "host" + volumes: + - $HOME/.cache/huggingface:/data + ports: + - "5009:5009" + devices: + - nvidia.com/gpu=all + environment: + - CUDA_VISIBLE_DEVICES=0 + - HF_HOME=/data + - HF_DATASETS_CACHE=/data + - HF_MODULES_CACHE=/data + - HF_HUB_CACHE=/data + command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"] + deploy: + resources: + reservations: + devices: + - driver: nvidia + # that's the closest analogue to --gpus; provide + # an integer amount of devices or 'all' + count: 1 + # Devices are reserved using a list of capabilities, making + # capabilities the only required field. A device MUST + # satisfy all the requested capabilities for a successful + # reservation. + capabilities: [gpu] + runtime: nvidia + healthcheck: + test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"] + interval: 5s + timeout: 5s + retries: 30 + llamastack-local-cpu: + depends_on: + text-generation-inference: + condition: service_healthy + image: llamastack-local-cpu + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to TGI run.yaml file + - ./tgi-run.yaml:/root/llamastack-run-tgi.yaml + ports: + - "5000:5000" + # Hack: wait for TGI server to start before starting docker + entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-tgi.yaml" + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s diff --git a/llama_stack/distribution/docker/tgi/tgi-run.yaml b/llama_stack/distribution/docker/tgi/tgi-run.yaml new file mode 100644 index 000000000..dc8cb2d2d --- /dev/null +++ b/llama_stack/distribution/docker/tgi/tgi-run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: http://127.0.0.1:5009 + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} From 319a6b5f8348e92807d4ae82a13d76ec94be4623 Mon Sep 17 00:00:00 2001 From: ATH Date: Wed, 16 Oct 2024 21:05:36 -0400 Subject: [PATCH 09/40] Update getting_started.md (#260) --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 6c8c902c0..18b1218a4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -69,7 +69,7 @@ You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/ #### Via docker ``` -docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack-local-gpu +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu ``` > [!NOTE] From a07dfffbbfd42215f0a5e52e45e5c69e49950697 Mon Sep 17 00:00:00 2001 From: Tam <45498088+tamdogood@users.noreply.github.com> Date: Wed, 16 Oct 2024 23:15:59 -0700 Subject: [PATCH 10/40] initial changes (#261) Update the parsing logic for comma-separated list and download function --- llama_stack/cli/download.py | 40 +++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index a1495cbf0..4a0f88aaa 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -152,27 +152,29 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): parser.error("Please provide a model id") return - prompt_guard = prompt_guard_model_sku() - if args.model_id == prompt_guard.model_id: - model = prompt_guard - info = prompt_guard_download_info() - else: - model = resolve_model(args.model_id) - if model is None: - parser.error(f"Model {args.model_id} not found") - return - info = llama_meta_net_info(model) + # Check if model_id is a comma-separated list + model_ids = [model_id.strip() for model_id in args.model_id.split(",")] - if args.source == "huggingface": - _hf_download(model, args.hf_token, args.ignore_patterns, parser) - else: - meta_url = args.meta_url - if not meta_url: - meta_url = input( - "Please provide the signed URL you received via email after visiting https://www.llama.com/llama-downloads/ (e.g., https://llama3-1.llamameta.net/*?Policy...): " + prompt_guard = prompt_guard_model_sku() + for model_id in model_ids: + if model_id == prompt_guard.model_id: + model = prompt_guard + info = prompt_guard_download_info() + else: + model = resolve_model(model_id) + if model is None: + parser.error(f"Model {model_id} not found") + continue + info = llama_meta_net_info(model) + + if args.source == "huggingface": + _hf_download(model, args.hf_token, args.ignore_patterns, parser) + else: + meta_url = args.meta_url or input( + f"Please provide the signed URL for model {model_id} you received via email after visiting https://www.llama.com/llama-downloads/ (e.g., https://llama3-1.llamameta.net/*?Policy...): " ) - assert meta_url is not None and "llamameta.net" in meta_url - _meta_download(model, meta_url, info) + assert "llamameta.net" in meta_url + _meta_download(model, meta_url, info) class ModelEntry(BaseModel): From d787d1e84fd03ed9a0c5f578c016ec269a2d1970 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 16 Oct 2024 23:25:10 -0700 Subject: [PATCH 11/40] config templates restructure, docs (#262) * wip * config templates * readmes --- README.md | 6 +- docs/getting_started.md | 62 +++++-------------- .../local-bedrock-conda-example-build.yaml | 0 .../local-cpu-docker-build.yaml} | 0 .../local-databricks-build.yaml | 0 .../local-fireworks-build.yaml | 0 .../local-gpu-docker-build.yaml} | 4 +- .../local-hf-endpoint-build.yaml | 0 .../local-hf-serverless-build.yaml | 0 .../local-ollama-build.yaml | 0 .../{ => build_configs}/local-tgi-build.yaml | 0 .../local-tgi-chroma-docker-build.yaml} | 8 +-- .../local-together-build.yaml | 0 .../{ => build_configs}/local-vllm-build.yaml | 0 .../run.yaml => run_configs/local-run.yaml} | 25 +++++--- .../local-tgi-run.yaml} | 30 +++++---- 16 files changed, 57 insertions(+), 78 deletions(-) rename llama_stack/distribution/templates/{ => build_configs}/local-bedrock-conda-example-build.yaml (100%) rename llama_stack/distribution/templates/{docker/llamastack-local-cpu/build.yaml => build_configs/local-cpu-docker-build.yaml} (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-databricks-build.yaml (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-fireworks-build.yaml (100%) rename llama_stack/distribution/templates/{local-build.yaml => build_configs/local-gpu-docker-build.yaml} (87%) rename llama_stack/distribution/templates/{ => build_configs}/local-hf-endpoint-build.yaml (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-hf-serverless-build.yaml (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-ollama-build.yaml (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-tgi-build.yaml (100%) rename llama_stack/distribution/templates/{docker/llamastack-local-gpu/build.yaml => build_configs/local-tgi-chroma-docker-build.yaml} (53%) rename llama_stack/distribution/templates/{ => build_configs}/local-together-build.yaml (100%) rename llama_stack/distribution/templates/{ => build_configs}/local-vllm-build.yaml (100%) rename llama_stack/distribution/templates/{docker/llamastack-local-gpu/run.yaml => run_configs/local-run.yaml} (71%) rename llama_stack/distribution/templates/{docker/llamastack-local-cpu/run.yaml => run_configs/local-tgi-run.yaml} (60%) diff --git a/README.md b/README.md index 238475840..fef556a73 100644 --- a/README.md +++ b/README.md @@ -90,10 +90,10 @@ The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please * [CLI reference](docs/cli_reference.md) * Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution. * [Getting Started](docs/getting_started.md) - * Guide to build and run a Llama Stack server. + * Guide to start a Llama Stack server. + * [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs * [Contributing](CONTRIBUTING.md) - ## Llama Stack Client SDK | **Language** | **Client SDK** | **Package** | @@ -104,3 +104,5 @@ The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please | Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. + +You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. diff --git a/docs/getting_started.md b/docs/getting_started.md index 18b1218a4..0e0db0201 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,45 +1,9 @@ -# llama-stack - -[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-stack)](https://pypi.org/project/llama-stack/) -[![Discord](https://img.shields.io/discord/1257833999603335178)](https://discord.gg/llama-stack) - -This repository contains the specifications and implementations of the APIs which are part of the Llama Stack. - -The Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. These blocks span the entire development lifecycle: from model training and fine-tuning, through product evaluation, to invoking AI agents in production. Beyond definition, we're developing open-source versions and partnering with cloud providers, ensuring developers can assemble AI solutions using consistent, interlocking pieces across platforms. The ultimate goal is to accelerate innovation in the AI space. - -The Stack APIs are rapidly improving, but still very much work in progress and we invite feedback as well as direct contributions. - - -## APIs - -The Llama Stack consists of the following set of APIs: - -- Inference -- Safety -- Memory -- Agentic System -- Evaluation -- Post Training -- Synthetic Data Generation -- Reward Scoring - -Each of the APIs themselves is a collection of REST endpoints. - -## API Providers - -A Provider is what makes the API real -- they provide the actual implementation backing the API. - -As an example, for Inference, we could have the implementation be backed by open source libraries like `[ torch | vLLM | TensorRT ]` as possible options. - -A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs. - - -## Llama Stack Distribution - -A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications. +# Getting Started with Llama Stack +This guide will walk you though the steps to get started on end-to-end flow for LlamaStack. This guide mainly focuses on getting started with building a LlamaStack distribution, and starting up a LlamaStack server. ## Installation +The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. You can install this repository as a [package](https://pypi.org/project/llama-stack/) with `pip install llama-stack` @@ -57,17 +21,14 @@ cd llama-stack $CONDA_PREFIX/bin/pip install -e . ``` -# Getting Started +For what you can do with the Llama CLI, please refer to [CLI Reference](./cli_reference.md). -The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. - -This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes! - -You may also checkout this [notebook](https://github.com/meta-llama/llama-stack/blob/main/docs/getting_started.ipynb) for trying out out demo scripts. +## Starting the Llama Stack Server ## Quick Cheatsheet +This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes! -#### Via docker +#### Starting up server via docker ``` docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu ``` @@ -75,8 +36,12 @@ docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llama > [!NOTE] > `~/.llama` should be the path containing downloaded weights of Llama models. +> [!TIP] +> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI). You can checkout [these scripts](../llama_stack/distribution/docker/README.md) to help you get started. + +#### Build->Configure->Run via conda +You may also build a LlamaStack distribution from scratch, configure it, and start running the distribution. This is useful for developing on LlamaStack. -#### Via conda **`llama stack build`** - You'll be prompted to enter build information interactively. ``` @@ -445,4 +410,7 @@ Similarly you can test safety (if you configured llama-guard and/or prompt-guard python -m llama_stack.apis.safety.client localhost 5000 ``` + +Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. + You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. diff --git a/llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml b/llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-bedrock-conda-example-build.yaml rename to llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml b/llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml similarity index 100% rename from llama_stack/distribution/templates/docker/llamastack-local-cpu/build.yaml rename to llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml diff --git a/llama_stack/distribution/templates/local-databricks-build.yaml b/llama_stack/distribution/templates/build_configs/local-databricks-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-databricks-build.yaml rename to llama_stack/distribution/templates/build_configs/local-databricks-build.yaml diff --git a/llama_stack/distribution/templates/local-fireworks-build.yaml b/llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-fireworks-build.yaml rename to llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml diff --git a/llama_stack/distribution/templates/local-build.yaml b/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml similarity index 87% rename from llama_stack/distribution/templates/local-build.yaml rename to llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml index f10461256..01af1021e 100644 --- a/llama_stack/distribution/templates/local-build.yaml +++ b/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml @@ -1,4 +1,4 @@ -name: local +name: local-gpu distribution_spec: description: Use code from `llama_stack` itself to serve all llama stack APIs providers: @@ -7,4 +7,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/llama_stack/distribution/templates/local-hf-endpoint-build.yaml b/llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-hf-endpoint-build.yaml rename to llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml diff --git a/llama_stack/distribution/templates/local-hf-serverless-build.yaml b/llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-hf-serverless-build.yaml rename to llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml diff --git a/llama_stack/distribution/templates/local-ollama-build.yaml b/llama_stack/distribution/templates/build_configs/local-ollama-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-ollama-build.yaml rename to llama_stack/distribution/templates/build_configs/local-ollama-build.yaml diff --git a/llama_stack/distribution/templates/local-tgi-build.yaml b/llama_stack/distribution/templates/build_configs/local-tgi-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-tgi-build.yaml rename to llama_stack/distribution/templates/build_configs/local-tgi-build.yaml diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml b/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml similarity index 53% rename from llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml rename to llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml index 11d1ac01c..30715c551 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/build.yaml +++ b/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml @@ -1,11 +1,11 @@ -name: local-gpu +name: local-tgi-chroma distribution_spec: - description: local meta reference + description: remote tgi inference + chromadb memory docker_image: null providers: - inference: meta-reference + inference: remote::tgi safety: meta-reference agents: meta-reference - memory: meta-reference + memory: remote::chromadb telemetry: meta-reference image_type: docker diff --git a/llama_stack/distribution/templates/local-together-build.yaml b/llama_stack/distribution/templates/build_configs/local-together-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-together-build.yaml rename to llama_stack/distribution/templates/build_configs/local-together-build.yaml diff --git a/llama_stack/distribution/templates/local-vllm-build.yaml b/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml similarity index 100% rename from llama_stack/distribution/templates/local-vllm-build.yaml rename to llama_stack/distribution/templates/build_configs/local-vllm-build.yaml diff --git a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml b/llama_stack/distribution/templates/run_configs/local-run.yaml similarity index 71% rename from llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml rename to llama_stack/distribution/templates/run_configs/local-run.yaml index 8fb02711b..7abf2b4dc 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-gpu/run.yaml +++ b/llama_stack/distribution/templates/run_configs/local-run.yaml @@ -1,16 +1,16 @@ version: '2' -built_at: '2024-10-08T17:42:33.690666' -image_name: local-gpu -docker_image: local-gpu -conda_env: null +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local apis: -- memory -- inference -- agents - shields -- safety +- agents - models +- memory - memory_banks +- inference +- safety providers: inference: - provider_id: meta-reference @@ -25,8 +25,13 @@ providers: - provider_id: meta-reference provider_type: meta-reference config: - llama_guard_shield: null - prompt_guard_shield: null + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M memory: - provider_id: meta-reference provider_type: meta-reference diff --git a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml b/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml similarity index 60% rename from llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml rename to llama_stack/distribution/templates/run_configs/local-tgi-run.yaml index 6b107d972..ec3af742c 100644 --- a/llama_stack/distribution/templates/docker/llamastack-local-cpu/run.yaml +++ b/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml @@ -1,29 +1,33 @@ version: '2' -built_at: '2024-10-08T17:42:07.505267' -image_name: local-cpu -docker_image: local-cpu -conda_env: null +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local apis: +- shields - agents -- inference - models - memory -- safety -- shields - memory_banks +- inference +- safety providers: inference: - - provider_id: remote::ollama - provider_type: remote::ollama + - provider_id: tgi0 + provider_type: remote::tgi config: - host: localhost - port: 6000 + url: http://127.0.0.1:5009 safety: - provider_id: meta-reference provider_type: meta-reference config: - llama_guard_shield: null - prompt_guard_shield: null + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M memory: - provider_id: meta-reference provider_type: meta-reference From 7cc47da8f28d410407b293aae241e9edbcedcf00 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 16 Oct 2024 23:50:31 -0700 Subject: [PATCH 12/40] Update getting_started.md --- docs/getting_started.md | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 0e0db0201..157fbb0ff 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -29,15 +29,31 @@ For what you can do with the Llama CLI, please refer to [CLI Reference](./cli_re This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes! #### Starting up server via docker + +We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links. +- [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general) + - This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints. +- [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general) + - This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU. + +> [!NOTE] +> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container. ``` -docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu +export LLAMA_CHECKPOINT_DIR=~/.llama ``` > [!NOTE] > `~/.llama` should be the path containing downloaded weights of Llama models. + +To download and start running a pre-built docker container, you may use the following commands: + +``` +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu +``` + > [!TIP] -> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI). You can checkout [these scripts](../llama_stack/distribution/docker/README.md) to help you get started. +> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI) using [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general). You can checkout [these scripts](../llama_stack/distribution/docker/README.md) to help you get started. #### Build->Configure->Run via conda You may also build a LlamaStack distribution from scratch, configure it, and start running the distribution. This is useful for developing on LlamaStack. From cf9e5b76b23980ece0d73f897a71e9a2855416bd Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 16 Oct 2024 23:52:29 -0700 Subject: [PATCH 13/40] Update getting_started.md --- docs/getting_started.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 157fbb0ff..71fb39c60 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -23,10 +23,7 @@ $CONDA_PREFIX/bin/pip install -e . For what you can do with the Llama CLI, please refer to [CLI Reference](./cli_reference.md). -## Starting the Llama Stack Server - -## Quick Cheatsheet -This guides allows you to quickly get started with building and running a Llama Stack server in < 5 minutes! +## Quick Starting Llama Stack Server #### Starting up server via docker @@ -55,7 +52,7 @@ docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llama > [!TIP] > Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI) using [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general). You can checkout [these scripts](../llama_stack/distribution/docker/README.md) to help you get started. -#### Build->Configure->Run via conda +#### Build->Configure->Run Llama Stack server via conda You may also build a LlamaStack distribution from scratch, configure it, and start running the distribution. This is useful for developing on LlamaStack. **`llama stack build`** @@ -163,6 +160,7 @@ INFO: Application startup complete. INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) ``` +## Building a Distribution ## Step 1. Build In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: From 02be26098af231c6e5cbe76735917c22d7859928 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Wed, 16 Oct 2024 23:56:21 -0700 Subject: [PATCH 14/40] getting started --- docs/getting_started.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index 71fb39c60..3eebf8bbc 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,6 +1,6 @@ # Getting Started with Llama Stack -This guide will walk you though the steps to get started on end-to-end flow for LlamaStack. This guide mainly focuses on getting started with building a LlamaStack distribution, and starting up a LlamaStack server. +This guide will walk you though the steps to get started on end-to-end flow for LlamaStack. This guide mainly focuses on getting started with building a LlamaStack distribution, and starting up a LlamaStack server. Please see our [documentations](../README.md) on what you can do with Llama Stack, and [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) on examples apps built with Llama Stack. ## Installation The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. From 9fcf5d58e0aefea19700344424745d45c08e1ddf Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Thu, 17 Oct 2024 10:03:27 -0700 Subject: [PATCH 15/40] Allow overriding MODEL_IDS for inference test --- .../providers/tests/inference/test_inference.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 0afc894cf..581a0d428 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -5,6 +5,7 @@ # the root directory of this source tree. import itertools +import os import pytest import pytest_asyncio @@ -50,14 +51,17 @@ def get_expected_stop_reason(model: str): return StopReason.end_of_message if "Llama3.1" in model else StopReason.end_of_turn +if "MODEL_IDS" not in os.environ: + MODEL_IDS = [Llama_8B, Llama_3B] +else: + MODEL_IDS = os.environ["MODEL_IDS"].split(",") + + # This is going to create multiple Stack impls without tearing down the previous one # Fix that! @pytest_asyncio.fixture( scope="session", - params=[ - {"model": Llama_8B}, - {"model": Llama_3B}, - ], + params=[{"model": m} for m in MODEL_IDS], ids=lambda d: d["model"], ) async def inference_settings(request): From be3c5c034d2f94d1816bb719e3776f13cbc4896a Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Thu, 17 Oct 2024 17:28:17 -0700 Subject: [PATCH 16/40] [bugfix] fix case for agent when memory bank registered without specifying provider_id (#264) * fix case where memory bank is registered without provider_id * memory test * agents unit test --- llama_stack/apis/memory_banks/client.py | 15 +++ .../distribution/routers/routing_tables.py | 10 +- .../tests/agents/provider_config_example.yaml | 2 +- .../providers/tests/agents/test_agents.py | 101 ++++++++++++++++++ .../tests/memory/provider_config_example.yaml | 4 +- .../providers/tests/memory/test_memory.py | 24 +++++ 6 files changed, 151 insertions(+), 5 deletions(-) diff --git a/llama_stack/apis/memory_banks/client.py b/llama_stack/apis/memory_banks/client.py index 588a93fe2..69be35d02 100644 --- a/llama_stack/apis/memory_banks/client.py +++ b/llama_stack/apis/memory_banks/client.py @@ -92,6 +92,21 @@ async def run_main(host: str, port: int, stream: bool): response = await client.list_memory_banks() cprint(f"list_memory_banks response={response}", "green") + # register memory bank for the first time + response = await client.register_memory_bank( + VectorMemoryBankDef( + identifier="test_bank2", + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + overlap_size_in_tokens=64, + ) + ) + cprint(f"register_memory_bank response={response}", "blue") + + # list again after registering + response = await client.list_memory_banks() + cprint(f"list_memory_banks response={response}", "green") + def main(host: str, port: int, stream: bool = True): asyncio.run(run_main(host, port, stream)) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index 17755f0e4..ede30aea1 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -110,10 +110,16 @@ class CommonRoutingTableImpl(RoutingTable): async def register_object(self, obj: RoutableObjectWithProvider): entries = self.registry.get(obj.identifier, []) for entry in entries: - if entry.provider_id == obj.provider_id: - print(f"`{obj.identifier}` already registered with `{obj.provider_id}`") + if entry.provider_id == obj.provider_id or not obj.provider_id: + print( + f"`{obj.identifier}` already registered with `{entry.provider_id}`" + ) return + # if provider_id is not specified, we'll pick an arbitrary one from existing entries + if not obj.provider_id and len(self.impls_by_provider_id) > 0: + obj.provider_id = list(self.impls_by_provider_id.keys())[0] + if obj.provider_id not in self.impls_by_provider_id: raise ValueError(f"Provider `{obj.provider_id}` not found") diff --git a/llama_stack/providers/tests/agents/provider_config_example.yaml b/llama_stack/providers/tests/agents/provider_config_example.yaml index 5b643590c..58f05e29a 100644 --- a/llama_stack/providers/tests/agents/provider_config_example.yaml +++ b/llama_stack/providers/tests/agents/provider_config_example.yaml @@ -31,4 +31,4 @@ providers: persistence_store: namespace: null type: sqlite - db_path: /Users/ashwin/.llama/runtime/kvstore.db + db_path: ~/.llama/runtime/kvstore.db diff --git a/llama_stack/providers/tests/agents/test_agents.py b/llama_stack/providers/tests/agents/test_agents.py index edcc6adea..6774d3f1f 100644 --- a/llama_stack/providers/tests/agents/test_agents.py +++ b/llama_stack/providers/tests/agents/test_agents.py @@ -64,6 +64,24 @@ def search_query_messages(): ] +@pytest.fixture +def attachment_message(): + return [ + UserMessage( + content="I am attaching some documentation for Torchtune. Help me answer questions I will ask next.", + ), + ] + + +@pytest.fixture +def query_attachment_messages(): + return [ + UserMessage( + content="What are the top 5 topics that were explained? Only list succinct bullet points." + ), + ] + + @pytest.mark.asyncio async def test_create_agent_turn(agents_settings, sample_messages): agents_impl = agents_settings["impl"] @@ -123,6 +141,89 @@ async def test_create_agent_turn(agents_settings, sample_messages): assert len(final_event.turn.output_message.content) > 0 +@pytest.mark.asyncio +async def test_rag_agent_as_attachments( + agents_settings, attachment_message, query_attachment_messages +): + urls = [ + "memory_optimizations.rst", + "chat.rst", + "llama3.rst", + "datasets.rst", + "qat_finetune.rst", + "lora_finetune.rst", + ] + + attachments = [ + Attachment( + content=f"https://raw.githubusercontent.com/pytorch/torchtune/main/docs/source/tutorials/{url}", + mime_type="text/plain", + ) + for i, url in enumerate(urls) + ] + + agents_impl = agents_settings["impl"] + + agent_config = AgentConfig( + model=agents_settings["common_params"]["model"], + instructions=agents_settings["common_params"]["instructions"], + enable_session_persistence=True, + sampling_params=SamplingParams(temperature=0.7, top_p=0.95), + input_shields=[], + output_shields=[], + tools=[ + MemoryToolDefinition( + memory_bank_configs=[], + query_generator_config={ + "type": "default", + "sep": " ", + }, + max_tokens_in_context=4096, + max_chunks=10, + ), + ], + max_infer_iters=5, + ) + + create_response = await agents_impl.create_agent(agent_config) + agent_id = create_response.agent_id + + # Create a session + session_create_response = await agents_impl.create_agent_session( + agent_id, "Test Session" + ) + session_id = session_create_response.session_id + + # Create and execute a turn + turn_request = dict( + agent_id=agent_id, + session_id=session_id, + messages=attachment_message, + attachments=attachments, + stream=True, + ) + + turn_response = [ + chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + ] + + assert len(turn_response) > 0 + + # Create a second turn querying the agent + turn_request = dict( + agent_id=agent_id, + session_id=session_id, + messages=query_attachment_messages, + stream=True, + ) + + turn_response = [ + chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + ] + + assert len(turn_response) > 0 + + @pytest.mark.asyncio async def test_create_agent_turn_with_brave_search( agents_settings, search_query_messages diff --git a/llama_stack/providers/tests/memory/provider_config_example.yaml b/llama_stack/providers/tests/memory/provider_config_example.yaml index cac1adde5..5b5440f8d 100644 --- a/llama_stack/providers/tests/memory/provider_config_example.yaml +++ b/llama_stack/providers/tests/memory/provider_config_example.yaml @@ -2,8 +2,8 @@ providers: - provider_id: test-faiss provider_type: meta-reference config: {} - - provider_id: test-chroma - provider_type: remote::chroma + - provider_id: test-chromadb + provider_type: remote::chromadb config: host: localhost port: 6001 diff --git a/llama_stack/providers/tests/memory/test_memory.py b/llama_stack/providers/tests/memory/test_memory.py index c5ebdf9c7..d92feaba8 100644 --- a/llama_stack/providers/tests/memory/test_memory.py +++ b/llama_stack/providers/tests/memory/test_memory.py @@ -89,6 +89,30 @@ async def test_banks_list(memory_settings): assert len(response) == 0 +@pytest.mark.asyncio +async def test_banks_register(memory_settings): + # NOTE: this needs you to ensure that you are starting from a clean state + # but so far we don't have an unregister API unfortunately, so be careful + banks_impl = memory_settings["memory_banks_impl"] + bank = VectorMemoryBankDef( + identifier="test_bank_no_provider", + embedding_model="all-MiniLM-L6-v2", + chunk_size_in_tokens=512, + overlap_size_in_tokens=64, + ) + + await banks_impl.register_memory_bank(bank) + response = await banks_impl.list_memory_banks() + assert isinstance(response, list) + assert len(response) == 1 + + # register same memory bank with same id again will fail + await banks_impl.register_memory_bank(bank) + response = await banks_impl.list_memory_banks() + assert isinstance(response, list) + assert len(response) == 1 + + @pytest.mark.asyncio async def test_query_documents(memory_settings, sample_documents): memory_impl = memory_settings["memory_impl"] From 33afd34e6f557d2a9aae762a590cc75c89bcc029 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 18 Oct 2024 12:51:10 -0700 Subject: [PATCH 17/40] Add an option to not use elastic agents for meta-reference inference (#269) --- .../impls/meta_reference/inference/config.py | 7 +++- .../meta_reference/inference/inference.py | 34 +++++++++++++++---- 2 files changed, 33 insertions(+), 8 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/config.py b/llama_stack/providers/impls/meta_reference/inference/config.py index 901a8c7fb..4e1161ced 100644 --- a/llama_stack/providers/impls/meta_reference/inference/config.py +++ b/llama_stack/providers/impls/meta_reference/inference/config.py @@ -17,13 +17,18 @@ from llama_stack.providers.utils.inference import supported_inference_models class MetaReferenceInferenceConfig(BaseModel): model: str = Field( - default="Llama3.1-8B-Instruct", + default="Llama3.2-3B-Instruct", description="Model descriptor from `llama model list`", ) torch_seed: Optional[int] = None max_seq_len: int = 4096 max_batch_size: int = 1 + # when this is False, we assume that the distributed process group is setup by someone + # outside of this code (e.g., when run inside `torchrun`). that is useful for clients + # (including our testing code) who might be using llama-stack as a library. + create_distributed_process_group: bool = True + @field_validator("model") @classmethod def validate_model(cls, model: str) -> str: diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index 6696762c9..7edc279d0 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -18,6 +18,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import ( ) from .config import MetaReferenceInferenceConfig +from .generation import Llama from .model_parallel import LlamaModelParallelGenerator # there's a single model parallel process running serving the model. for now, @@ -36,8 +37,11 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): async def initialize(self) -> None: print(f"Loading model `{self.model.descriptor()}`") - self.generator = LlamaModelParallelGenerator(self.config) - self.generator.start() + if self.config.create_distributed_process_group: + self.generator = LlamaModelParallelGenerator(self.config) + self.generator.start() + else: + self.generator = Llama.build(self.config) async def register_model(self, model: ModelDef) -> None: raise ValueError("Dynamic model registration is not supported") @@ -51,7 +55,8 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): ] async def shutdown(self) -> None: - self.generator.stop() + if self.config.create_distributed_process_group: + self.generator.stop() def completion( self, @@ -99,8 +104,9 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): f"Model mismatch: {request.model} != {self.model.descriptor()}" ) - if SEMAPHORE.locked(): - raise RuntimeError("Only one concurrent request is supported") + if self.config.create_distributed_process_group: + if SEMAPHORE.locked(): + raise RuntimeError("Only one concurrent request is supported") if request.stream: return self._stream_chat_completion(request) @@ -110,7 +116,7 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): async def _nonstream_chat_completion( self, request: ChatCompletionRequest ) -> ChatCompletionResponse: - async with SEMAPHORE: + def impl(): messages = chat_completion_request_to_messages(request) tokens = [] @@ -154,10 +160,16 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): logprobs=logprobs if request.logprobs else None, ) + if self.config.create_distributed_process_group: + async with SEMAPHORE: + return impl() + else: + return impl() + async def _stream_chat_completion( self, request: ChatCompletionRequest ) -> AsyncGenerator: - async with SEMAPHORE: + def impl(): messages = chat_completion_request_to_messages(request) yield ChatCompletionResponseStreamChunk( @@ -272,6 +284,14 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): ) ) + if self.config.create_distributed_process_group: + async with SEMAPHORE: + for x in impl(): + yield x + else: + for x in impl(): + yield x + async def embeddings( self, model: str, From 71a905e93f06b7779d37755d0c8831513f54cb8f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 18 Oct 2024 14:28:06 -0700 Subject: [PATCH 18/40] Allow overridding checkpoint_dir via config --- .../impls/meta_reference/inference/config.py | 4 ++++ .../meta_reference/inference/generation.py | 21 +++++++++++-------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/inference/config.py b/llama_stack/providers/impls/meta_reference/inference/config.py index 4e1161ced..48cba645b 100644 --- a/llama_stack/providers/impls/meta_reference/inference/config.py +++ b/llama_stack/providers/impls/meta_reference/inference/config.py @@ -29,6 +29,10 @@ class MetaReferenceInferenceConfig(BaseModel): # (including our testing code) who might be using llama-stack as a library. create_distributed_process_group: bool = True + # By default, the implementation will look at ~/.llama/checkpoints/ but you + # can override by specifying the directory explicitly + checkpoint_dir: Optional[str] = None + @field_validator("model") @classmethod def validate_model(cls, model: str) -> str: diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 9037b9acd..20a8addc7 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -98,7 +98,10 @@ class Llama: sys.stdout = open(os.devnull, "w") start_time = time.time() - ckpt_dir = model_checkpoint_dir(model) + if config.checkpoint_dir: + ckpt_dir = config.checkpoint_dir + else: + ckpt_dir = model_checkpoint_dir(model) checkpoints = sorted(Path(ckpt_dir).glob("*.pth")) assert len(checkpoints) > 0, f"no checkpoint files found in {ckpt_dir}" @@ -119,9 +122,7 @@ class Llama: **params, ) - tokenizer_path = os.path.join(ckpt_dir, "tokenizer.model") - tokenizer = Tokenizer(model_path=tokenizer_path) - + tokenizer = Tokenizer.get_instance() assert ( model_args.vocab_size == tokenizer.n_words ), f"model_args vocab = {model_args.vocab_size} but tokenizer vocab = {tokenizer.n_words}" @@ -170,14 +171,16 @@ class Llama: logprobs: bool = False, echo: bool = False, include_stop_token: bool = False, + print_input_tokens: bool = False, ) -> Generator: params = self.model.params - # input_tokens = [ - # self.formatter.vision_token if t == 128256 else t - # for t in model_input.tokens - # ] - # cprint("Input to model -> " + self.tokenizer.decode(input_tokens), "red") + if print_input_tokens: + input_tokens = [ + self.formatter.vision_token if t == 128256 else t + for t in model_input.tokens + ] + cprint("Input to model -> " + self.tokenizer.decode(input_tokens), "red") prompt_tokens = [model_input.tokens] bsz = 1 From 95a96afe34136f060591df2509004cf9c98701b4 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 18 Oct 2024 14:41:38 -0700 Subject: [PATCH 19/40] Small rename --- llama_stack/distribution/resolver.py | 2 +- llama_stack/distribution/server/server.py | 4 ++-- llama_stack/providers/tests/resolver.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/llama_stack/distribution/resolver.py b/llama_stack/distribution/resolver.py index a05e08cd7..78d76e977 100644 --- a/llama_stack/distribution/resolver.py +++ b/llama_stack/distribution/resolver.py @@ -55,7 +55,7 @@ class ProviderWithSpec(Provider): # TODO: this code is not very straightforward to follow and needs one more round of refactoring -async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, Any]: +async def resolve_impls(run_config: StackRunConfig) -> Dict[Api, Any]: """ Does two things: - flatmaps, sorts and resolves the providers in dependency order diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index eba89e393..6154432b6 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -37,7 +37,7 @@ from llama_stack.providers.utils.telemetry.tracing import ( from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.request_headers import set_request_provider_data -from llama_stack.distribution.resolver import resolve_impls_with_routing +from llama_stack.distribution.resolver import resolve_impls from .endpoints import get_all_api_endpoints @@ -276,7 +276,7 @@ def main( app = FastAPI() - impls = asyncio.run(resolve_impls_with_routing(config)) + impls = asyncio.run(resolve_impls(config)) if Api.telemetry in impls: setup_logger(impls[Api.telemetry]) diff --git a/llama_stack/providers/tests/resolver.py b/llama_stack/providers/tests/resolver.py index fabb245e7..de672b6dc 100644 --- a/llama_stack/providers/tests/resolver.py +++ b/llama_stack/providers/tests/resolver.py @@ -14,7 +14,7 @@ import yaml from llama_stack.distribution.datatypes import * # noqa: F403 from llama_stack.distribution.configure import parse_and_maybe_upgrade_config from llama_stack.distribution.request_headers import set_request_provider_data -from llama_stack.distribution.resolver import resolve_impls_with_routing +from llama_stack.distribution.resolver import resolve_impls async def resolve_impls_for_test(api: Api, deps: List[Api] = None): @@ -36,7 +36,7 @@ async def resolve_impls_for_test(api: Api, deps: List[Api] = None): providers=chosen, ) run_config = parse_and_maybe_upgrade_config(run_config) - impls = await resolve_impls_with_routing(run_config) + impls = await resolve_impls(run_config) if "provider_data" in config_dict: provider_id = chosen[api.value][0].provider_id From 2089427d60be9f17d8de9cadd1e6c0c6cef253fd Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Fri, 18 Oct 2024 20:50:59 -0700 Subject: [PATCH 20/40] Make all methods `async def` again; add completion() for meta-reference (#270) PR #201 had made several changes while trying to fix issues with getting the stream=False branches of inference and agents API working. As part of this, it made a change which was slightly gratuitous. Namely, making chat_completion() and brethren "def" instead of "async def". The rationale was that this allowed the user (within llama-stack) of this to use it as: ``` async for chunk in api.chat_completion(params) ``` However, it causes unnecessary confusion for several folks. Given that clients (e.g., llama-stack-apps) anyway use the SDK methods (which are completely isolated) this choice was not ideal. Let's revert back so the call now looks like: ``` async for chunk in await api.chat_completion(params) ``` Bonus: Added a completion() implementation for the meta-reference provider. Technically should have been another PR :) --- docs/resources/llama-stack-spec.html | 78 +++++---- docs/resources/llama-stack-spec.yaml | 37 ++-- llama_stack/apis/agents/agents.py | 4 +- llama_stack/apis/agents/client.py | 6 +- llama_stack/apis/inference/client.py | 7 +- llama_stack/apis/inference/inference.py | 13 +- llama_stack/distribution/routers/routers.py | 12 +- .../adapters/inference/bedrock/bedrock.py | 4 +- .../inference/databricks/databricks.py | 6 +- .../adapters/inference/fireworks/fireworks.py | 6 +- .../adapters/inference/ollama/ollama.py | 6 +- .../providers/adapters/inference/tgi/tgi.py | 6 +- .../adapters/inference/together/together.py | 4 +- .../meta_reference/agents/agent_instance.py | 2 +- .../impls/meta_reference/agents/agents.py | 2 +- .../meta_reference/inference/generation.py | 51 +++--- .../meta_reference/inference/inference.py | 160 ++++++++++++++---- .../inference/model_parallel.py | 50 +++--- .../inference/parallel_utils.py | 30 ++-- .../meta_reference/safety/llama_guard.py | 2 +- llama_stack/providers/impls/vllm/vllm.py | 6 +- .../providers/tests/agents/test_agents.py | 8 +- .../tests/inference/test_inference.py | 43 ++++- 23 files changed, 330 insertions(+), 213 deletions(-) diff --git a/docs/resources/llama-stack-spec.html b/docs/resources/llama-stack-spec.html index a2f92b6e4..8e6683931 100644 --- a/docs/resources/llama-stack-spec.html +++ b/docs/resources/llama-stack-spec.html @@ -21,7 +21,7 @@ "info": { "title": "[DRAFT] Llama Stack Specification", "version": "0.0.1", - "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-10 15:29:56.831109" + "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-10-18 20:48:17.730988" }, "servers": [ { @@ -2830,8 +2830,11 @@ "CompletionResponse": { "type": "object", "properties": { - "completion_message": { - "$ref": "#/components/schemas/CompletionMessage" + "content": { + "type": "string" + }, + "stop_reason": { + "$ref": "#/components/schemas/StopReason" }, "logprobs": { "type": "array", @@ -2842,7 +2845,8 @@ }, "additionalProperties": false, "required": [ - "completion_message" + "content", + "stop_reason" ], "title": "Completion response." }, @@ -6075,49 +6079,49 @@ ], "tags": [ { - "name": "Evaluations" - }, - { - "name": "Inspect" + "name": "Models" }, { "name": "RewardScoring" }, { - "name": "Datasets" - }, - { - "name": "Models" - }, - { - "name": "Telemetry" - }, - { - "name": "PostTraining" - }, - { - "name": "SyntheticDataGeneration" - }, - { - "name": "BatchInference" - }, - { - "name": "Inference" - }, - { - "name": "Agents" - }, - { - "name": "Memory" - }, - { - "name": "Safety" + "name": "MemoryBanks" }, { "name": "Shields" }, { - "name": "MemoryBanks" + "name": "SyntheticDataGeneration" + }, + { + "name": "Inference" + }, + { + "name": "Inspect" + }, + { + "name": "BatchInference" + }, + { + "name": "Memory" + }, + { + "name": "Datasets" + }, + { + "name": "Agents" + }, + { + "name": "PostTraining" + }, + { + "name": "Telemetry" + }, + { + "name": "Safety" + }, + { + "name": "Evaluations" }, { "name": "BuiltinTool", diff --git a/docs/resources/llama-stack-spec.yaml b/docs/resources/llama-stack-spec.yaml index c9822d6ca..906d3934a 100644 --- a/docs/resources/llama-stack-spec.yaml +++ b/docs/resources/llama-stack-spec.yaml @@ -501,14 +501,17 @@ components: CompletionResponse: additionalProperties: false properties: - completion_message: - $ref: '#/components/schemas/CompletionMessage' + content: + type: string logprobs: items: $ref: '#/components/schemas/TokenLogProbs' type: array + stop_reason: + $ref: '#/components/schemas/StopReason' required: - - completion_message + - content + - stop_reason title: Completion response. type: object CompletionResponseStreamChunk: @@ -2507,7 +2510,7 @@ info: description: "This is the specification of the llama stack that provides\n \ \ a set of endpoints and their corresponding interfaces that are tailored\ \ to\n best leverage Llama Models. The specification is still in\ - \ draft and subject to change.\n Generated at 2024-10-10 15:29:56.831109" + \ draft and subject to change.\n Generated at 2024-10-18 20:48:17.730988" title: '[DRAFT] Llama Stack Specification' version: 0.0.1 jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema @@ -3712,21 +3715,21 @@ security: servers: - url: http://any-hosted-llama-stack.com tags: -- name: Evaluations -- name: Inspect -- name: RewardScoring -- name: Datasets - name: Models -- name: Telemetry -- name: PostTraining -- name: SyntheticDataGeneration -- name: BatchInference -- name: Inference -- name: Agents -- name: Memory -- name: Safety -- name: Shields +- name: RewardScoring - name: MemoryBanks +- name: Shields +- name: SyntheticDataGeneration +- name: Inference +- name: Inspect +- name: BatchInference +- name: Memory +- name: Datasets +- name: Agents +- name: PostTraining +- name: Telemetry +- name: Safety +- name: Evaluations - description: name: BuiltinTool - description: AgentCreateResponse: ... - # This method is not `async def` because it can result in either an - # `AsyncGenerator` or a `AgentTurnCreateResponse` depending on the value of `stream`. @webmethod(route="/agents/turn/create") - def create_agent_turn( + async def create_agent_turn( self, agent_id: str, session_id: str, diff --git a/llama_stack/apis/agents/client.py b/llama_stack/apis/agents/client.py index 32bc9abdd..b45447328 100644 --- a/llama_stack/apis/agents/client.py +++ b/llama_stack/apis/agents/client.py @@ -67,14 +67,14 @@ class AgentsClient(Agents): response.raise_for_status() return AgentSessionCreateResponse(**response.json()) - def create_agent_turn( + async def create_agent_turn( self, request: AgentTurnCreateRequest, ) -> AsyncGenerator: if request.stream: return self._stream_agent_turn(request) else: - return self._nonstream_agent_turn(request) + return await self._nonstream_agent_turn(request) async def _stream_agent_turn( self, request: AgentTurnCreateRequest @@ -126,7 +126,7 @@ async def _run_agent( for content in user_prompts: cprint(f"User> {content}", color="white", attrs=["bold"]) - iterator = api.create_agent_turn( + iterator = await api.create_agent_turn( AgentTurnCreateRequest( agent_id=create_response.agent_id, session_id=session_response.session_id, diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 79d2cc02c..90636fa36 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -42,10 +42,10 @@ class InferenceClient(Inference): async def shutdown(self) -> None: pass - def completion(self, request: CompletionRequest) -> AsyncGenerator: + async def completion(self, request: CompletionRequest) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -139,7 +139,8 @@ async def run_main( else: logprobs_config = None - iterator = client.chat_completion( + assert stream, "Non streaming not supported here" + iterator = await client.chat_completion( model=model, messages=[message], stream=stream, diff --git a/llama_stack/apis/inference/inference.py b/llama_stack/apis/inference/inference.py index 588dd37ca..5895e528e 100644 --- a/llama_stack/apis/inference/inference.py +++ b/llama_stack/apis/inference/inference.py @@ -88,7 +88,8 @@ class CompletionRequest(BaseModel): class CompletionResponse(BaseModel): """Completion response.""" - completion_message: CompletionMessage + content: str + stop_reason: StopReason logprobs: Optional[List[TokenLogProbs]] = None @@ -113,7 +114,7 @@ class BatchCompletionRequest(BaseModel): class BatchCompletionResponse(BaseModel): """Batch completion response.""" - completion_message_batch: List[CompletionMessage] + batch: List[CompletionResponse] @json_schema_type @@ -165,7 +166,7 @@ class BatchChatCompletionRequest(BaseModel): @json_schema_type class BatchChatCompletionResponse(BaseModel): - completion_message_batch: List[CompletionMessage] + batch: List[ChatCompletionResponse] @json_schema_type @@ -181,10 +182,8 @@ class ModelStore(Protocol): class Inference(Protocol): model_store: ModelStore - # This method is not `async def` because it can result in either an - # `AsyncGenerator` or a `CompletionResponse` depending on the value of `stream`. @webmethod(route="/inference/completion") - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -196,7 +195,7 @@ class Inference(Protocol): # This method is not `async def` because it can result in either an # `AsyncGenerator` or a `ChatCompletionResponse` depending on the value of `stream`. @webmethod(route="/inference/chat_completion") - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], diff --git a/llama_stack/distribution/routers/routers.py b/llama_stack/distribution/routers/routers.py index cf62da1d0..a78e808d0 100644 --- a/llama_stack/distribution/routers/routers.py +++ b/llama_stack/distribution/routers/routers.py @@ -70,7 +70,7 @@ class InferenceRouter(Inference): async def register_model(self, model: ModelDef) -> None: await self.routing_table.register_model(model) - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -93,11 +93,11 @@ class InferenceRouter(Inference): ) provider = self.routing_table.get_provider_impl(model) if stream: - return (chunk async for chunk in provider.chat_completion(**params)) + return (chunk async for chunk in await provider.chat_completion(**params)) else: - return provider.chat_completion(**params) + return await provider.chat_completion(**params) - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -114,9 +114,9 @@ class InferenceRouter(Inference): logprobs=logprobs, ) if stream: - return (chunk async for chunk in provider.completion(**params)) + return (chunk async for chunk in await provider.completion(**params)) else: - return provider.completion(**params) + return await provider.completion(**params) async def embeddings( self, diff --git a/llama_stack/providers/adapters/inference/bedrock/bedrock.py b/llama_stack/providers/adapters/inference/bedrock/bedrock.py index 22f87ef6b..8440ecc20 100644 --- a/llama_stack/providers/adapters/inference/bedrock/bedrock.py +++ b/llama_stack/providers/adapters/inference/bedrock/bedrock.py @@ -47,7 +47,7 @@ class BedrockInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: self.client.close() - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -283,7 +283,7 @@ class BedrockInferenceAdapter(ModelRegistryHelper, Inference): ) return tool_config - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], diff --git a/llama_stack/providers/adapters/inference/databricks/databricks.py b/llama_stack/providers/adapters/inference/databricks/databricks.py index 141051186..9f50ad227 100644 --- a/llama_stack/providers/adapters/inference/databricks/databricks.py +++ b/llama_stack/providers/adapters/inference/databricks/databricks.py @@ -48,7 +48,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -58,7 +58,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -84,7 +84,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference): if stream: return self._stream_chat_completion(request, client) else: - return self._nonstream_chat_completion(request, client) + return await self._nonstream_chat_completion(request, client) async def _nonstream_chat_completion( self, request: ChatCompletionRequest, client: OpenAI diff --git a/llama_stack/providers/adapters/inference/fireworks/fireworks.py b/llama_stack/providers/adapters/inference/fireworks/fireworks.py index c82012cba..537f3a6b4 100644 --- a/llama_stack/providers/adapters/inference/fireworks/fireworks.py +++ b/llama_stack/providers/adapters/inference/fireworks/fireworks.py @@ -51,7 +51,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): async def shutdown(self) -> None: pass - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -61,7 +61,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): ) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -87,7 +87,7 @@ class FireworksInferenceAdapter(ModelRegistryHelper, Inference): if stream: return self._stream_chat_completion(request, client) else: - return self._nonstream_chat_completion(request, client) + return await self._nonstream_chat_completion(request, client) async def _nonstream_chat_completion( self, request: ChatCompletionRequest, client: Fireworks diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index c50c869fd..3a3e4b451 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -84,7 +84,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): return ret - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -94,7 +94,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): ) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -118,7 +118,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): if stream: return self._stream_chat_completion(request) else: - return self._nonstream_chat_completion(request) + return await self._nonstream_chat_completion(request) def _get_params(self, request: ChatCompletionRequest) -> dict: return { diff --git a/llama_stack/providers/adapters/inference/tgi/tgi.py b/llama_stack/providers/adapters/inference/tgi/tgi.py index cd0afad0c..3c610099c 100644 --- a/llama_stack/providers/adapters/inference/tgi/tgi.py +++ b/llama_stack/providers/adapters/inference/tgi/tgi.py @@ -66,7 +66,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): async def shutdown(self) -> None: pass - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -76,7 +76,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): ) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -101,7 +101,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate): if stream: return self._stream_chat_completion(request) else: - return self._nonstream_chat_completion(request) + return await self._nonstream_chat_completion(request) async def _nonstream_chat_completion( self, request: ChatCompletionRequest diff --git a/llama_stack/providers/adapters/inference/together/together.py b/llama_stack/providers/adapters/inference/together/together.py index 750ca126e..8c73d75ec 100644 --- a/llama_stack/providers/adapters/inference/together/together.py +++ b/llama_stack/providers/adapters/inference/together/together.py @@ -64,7 +64,7 @@ class TogetherInferenceAdapter( ) -> AsyncGenerator: raise NotImplementedError() - def chat_completion( + async def chat_completion( self, model: str, messages: List[Message], @@ -101,7 +101,7 @@ class TogetherInferenceAdapter( if stream: return self._stream_chat_completion(request, client) else: - return self._nonstream_chat_completion(request, client) + return await self._nonstream_chat_completion(request, client) async def _nonstream_chat_completion( self, request: ChatCompletionRequest, client: Together diff --git a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 0d334fdad..cbc7490fd 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -424,7 +424,7 @@ class ChatAgent(ShieldRunnerMixin): stop_reason = None with tracing.span("inference"): - async for chunk in self.inference_api.chat_completion( + async for chunk in await self.inference_api.chat_completion( self.agent_config.model, input_messages, tools=self._get_tools(), diff --git a/llama_stack/providers/impls/meta_reference/agents/agents.py b/llama_stack/providers/impls/meta_reference/agents/agents.py index 5a209d0b7..8b3ece978 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agents.py +++ b/llama_stack/providers/impls/meta_reference/agents/agents.py @@ -105,7 +105,7 @@ class MetaReferenceAgentsImpl(Agents): session_id=session_id, ) - def create_agent_turn( + async def create_agent_turn( self, agent_id: str, session_id: str, diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 20a8addc7..9ca128176 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -23,11 +23,6 @@ from fairscale.nn.model_parallel.initialize import ( ) from llama_models.llama3.api.args import ModelArgs from llama_models.llama3.api.chat_format import ChatFormat, ModelInput -from llama_models.llama3.api.datatypes import ( - InterleavedTextMedia, - Message, - ToolPromptFormat, -) from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.llama3.reference_impl.model import Transformer from llama_models.llama3.reference_impl.multimodal.model import ( @@ -38,7 +33,11 @@ from llama_models.sku_list import resolve_model from pydantic import BaseModel from termcolor import cprint +from llama_stack.apis.inference import * # noqa: F403 from llama_stack.distribution.utils.model_utils import model_local_dir +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_messages, +) from .config import MetaReferenceInferenceConfig, MetaReferenceQuantizedInferenceConfig @@ -297,15 +296,12 @@ class Llama: if all(eos_reached): break - def text_completion( + def completion( self, - content: InterleavedTextMedia, - temperature: float = 0.6, - top_p: float = 0.9, - max_gen_len: Optional[int] = None, - logprobs: bool = False, - echo: bool = False, + request: CompletionRequest, ) -> Generator: + sampling_params = request.sampling_params + max_gen_len = sampling_params.max_tokens if ( max_gen_len is None or max_gen_len == 0 @@ -313,26 +309,25 @@ class Llama: ): max_gen_len = self.model.params.max_seq_len - 1 - model_input = self.formatter.encode_content(content) - + model_input = self.formatter.encode_content(request.content) yield from self.generate( model_input=model_input, max_gen_len=max_gen_len, - temperature=temperature, - top_p=top_p, - logprobs=logprobs, - echo=echo, + temperature=sampling_params.temperature, + top_p=sampling_params.top_p, + logprobs=bool(request.logprobs), + include_stop_token=True, + echo=False, ) def chat_completion( self, - messages: List[Message], - temperature: float = 0.6, - top_p: float = 0.9, - max_gen_len: Optional[int] = None, - logprobs: bool = False, - tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json, + request: ChatCompletionRequest, ) -> Generator: + messages = chat_completion_request_to_messages(request) + + sampling_params = request.sampling_params + max_gen_len = sampling_params.max_tokens if ( max_gen_len is None or max_gen_len == 0 @@ -343,12 +338,12 @@ class Llama: yield from self.generate( model_input=self.formatter.encode_dialog_prompt( messages, - tool_prompt_format, + request.tool_prompt_format, ), max_gen_len=max_gen_len, - temperature=temperature, - top_p=top_p, - logprobs=logprobs, + temperature=sampling_params.temperature, + top_p=sampling_params.top_p, + logprobs=bool(request.logprobs), include_stop_token=True, ) diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index 7edc279d0..34053343e 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -13,9 +13,6 @@ from llama_models.sku_list import resolve_model from llama_models.llama3.api.datatypes import * # noqa: F403 from llama_stack.apis.inference import * # noqa: F403 from llama_stack.providers.datatypes import ModelDef, ModelsProtocolPrivate -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_messages, -) from .config import MetaReferenceInferenceConfig from .generation import Llama @@ -58,7 +55,18 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): if self.config.create_distributed_process_group: self.generator.stop() - def completion( + def check_model(self, request) -> None: + model = resolve_model(request.model) + if model is None: + raise RuntimeError( + f"Unknown model: {request.model}, Run `llama model list`" + ) + elif model.descriptor() != self.model.descriptor(): + raise RuntimeError( + f"Model mismatch: {request.model} != {self.model.descriptor()}" + ) + + async def completion( self, model: str, content: InterleavedTextMedia, @@ -66,9 +74,114 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: - raise NotImplementedError() + if logprobs: + assert logprobs.top_k == 1, f"Unexpected top_k={logprobs.top_k}" - def chat_completion( + request = CompletionRequest( + model=model, + content=content, + sampling_params=sampling_params, + stream=stream, + logprobs=logprobs, + ) + self.check_model(request) + + if request.stream: + return self._stream_completion(request) + else: + return await self._nonstream_completion(request) + + async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator: + def impl(): + stop_reason = None + + for token_result in self.generator.completion(request): + if token_result.text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + elif token_result.text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + else: + text = token_result.text + + logprobs = None + if stop_reason is None: + if request.logprobs: + assert len(token_result.logprobs) == 1 + + logprobs = [ + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ] + + yield CompletionResponseStreamChunk( + delta=text, + stop_reason=stop_reason, + logprobs=logprobs if request.logprobs else None, + ) + + if stop_reason is None: + yield CompletionResponseStreamChunk( + delta="", + stop_reason=StopReason.out_of_tokens, + ) + + if self.config.create_distributed_process_group: + async with SEMAPHORE: + for x in impl(): + yield x + else: + for x in impl(): + yield x + + async def _nonstream_completion( + self, request: CompletionRequest + ) -> CompletionResponse: + def impl(): + tokens = [] + logprobs = [] + stop_reason = None + + tokenizer = self.generator.formatter.tokenizer + for token_result in self.generator.completion(request): + tokens.append(token_result.token) + + if token_result.token in tokenizer.stop_tokens: + # not quite right semantically + stop_reason = StopReason.end_of_turn + + if request.logprobs: + assert len(token_result.logprobs) == 1 + + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ) + + if stop_reason is None: + stop_reason = StopReason.out_of_tokens + + content = self.generator.formatter.tokenizer.decode(tokens) + return CompletionResponse( + content=content, + stop_reason=stop_reason, + logprobs=logprobs if request.logprobs else None, + ) + + if self.config.create_distributed_process_group: + async with SEMAPHORE: + return impl() + else: + return impl() + + async def chat_completion( self, model: str, messages: List[Message], @@ -93,16 +206,7 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): stream=stream, logprobs=logprobs, ) - - model = resolve_model(request.model) - if model is None: - raise RuntimeError( - f"Unknown model: {request.model}, Run `llama model list`" - ) - elif model.descriptor() != self.model.descriptor(): - raise RuntimeError( - f"Model mismatch: {request.model} != {self.model.descriptor()}" - ) + self.check_model(request) if self.config.create_distributed_process_group: if SEMAPHORE.locked(): @@ -111,26 +215,17 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): if request.stream: return self._stream_chat_completion(request) else: - return self._nonstream_chat_completion(request) + return await self._nonstream_chat_completion(request) async def _nonstream_chat_completion( self, request: ChatCompletionRequest ) -> ChatCompletionResponse: def impl(): - messages = chat_completion_request_to_messages(request) - tokens = [] logprobs = [] stop_reason = None - for token_result in self.generator.chat_completion( - messages=messages, - temperature=request.sampling_params.temperature, - top_p=request.sampling_params.top_p, - max_gen_len=request.sampling_params.max_tokens, - logprobs=request.logprobs, - tool_prompt_format=request.tool_prompt_format, - ): + for token_result in self.generator.chat_completion(request): tokens.append(token_result.token) if token_result.text == "<|eot_id|>": @@ -170,8 +265,6 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): self, request: ChatCompletionRequest ) -> AsyncGenerator: def impl(): - messages = chat_completion_request_to_messages(request) - yield ChatCompletionResponseStreamChunk( event=ChatCompletionResponseEvent( event_type=ChatCompletionResponseEventType.start, @@ -184,14 +277,7 @@ class MetaReferenceInferenceImpl(Inference, ModelsProtocolPrivate): stop_reason = None ipython = False - for token_result in self.generator.chat_completion( - messages=messages, - temperature=request.sampling_params.temperature, - top_p=request.sampling_params.top_p, - max_gen_len=request.sampling_params.max_tokens, - logprobs=request.logprobs, - tool_prompt_format=request.tool_prompt_format, - ): + for token_result in self.generator.chat_completion(request): tokens.append(token_result.token) if not ipython and token_result.text.startswith("<|python_tag|>"): diff --git a/llama_stack/providers/impls/meta_reference/inference/model_parallel.py b/llama_stack/providers/impls/meta_reference/inference/model_parallel.py index e8f483f30..7e7831185 100644 --- a/llama_stack/providers/impls/meta_reference/inference/model_parallel.py +++ b/llama_stack/providers/impls/meta_reference/inference/model_parallel.py @@ -7,16 +7,17 @@ import os from copy import deepcopy from functools import partial -from typing import Generator, List, Optional +from typing import Any, Generator from llama_models.llama3.api.chat_format import ChatFormat -from llama_models.llama3.api.datatypes import Message, ToolPromptFormat from llama_models.llama3.api.tokenizer import Tokenizer from llama_models.sku_list import resolve_model +from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest + from .config import MetaReferenceInferenceConfig from .generation import Llama, model_checkpoint_dir -from .parallel_utils import InferenceArgs, ModelParallelProcessGroup +from .parallel_utils import ModelParallelProcessGroup class ModelRunner: @@ -24,15 +25,13 @@ class ModelRunner: self.llama = llama # the `task` object is the same that is sent to `ModelParallelProcessGroup.run_inference()` - def __call__(self, task: InferenceArgs): - return self.llama.chat_completion( - task.messages, - task.temperature, - task.top_p, - task.max_gen_len, - task.logprobs, - task.tool_prompt_format, - ) + def __call__(self, req: Any): + if isinstance(req, ChatCompletionRequest): + return self.llama.chat_completion(req) + elif isinstance(req, CompletionRequest): + return self.llama.completion(req) + else: + raise ValueError(f"Unexpected task type {type(req)}") def init_model_cb(config: MetaReferenceInferenceConfig): @@ -77,23 +76,18 @@ class LlamaModelParallelGenerator: def __exit__(self, exc_type, exc_value, exc_traceback): self.group.stop() - def chat_completion( + def completion( self, - messages: List[Message], - temperature: float = 0.6, - top_p: float = 0.9, - max_gen_len: Optional[int] = None, - logprobs: bool = False, - tool_prompt_format: ToolPromptFormat = ToolPromptFormat.json, + request: CompletionRequest, ) -> Generator: - req_obj = InferenceArgs( - messages=deepcopy(messages), - temperature=temperature, - top_p=top_p, - max_gen_len=max_gen_len, - logprobs=logprobs or False, - tool_prompt_format=tool_prompt_format, - ) - + req_obj = deepcopy(request) + gen = self.group.run_inference(req_obj) + yield from gen + + def chat_completion( + self, + request: ChatCompletionRequest, + ) -> Generator: + req_obj = deepcopy(request) gen = self.group.run_inference(req_obj) yield from gen diff --git a/llama_stack/providers/impls/meta_reference/inference/parallel_utils.py b/llama_stack/providers/impls/meta_reference/inference/parallel_utils.py index 7dbedd0f0..62eeefaac 100644 --- a/llama_stack/providers/impls/meta_reference/inference/parallel_utils.py +++ b/llama_stack/providers/impls/meta_reference/inference/parallel_utils.py @@ -4,6 +4,12 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. +# Copyright (c) Meta Platforms, IAny, nc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + import json import multiprocessing import os @@ -11,10 +17,9 @@ import tempfile import time import uuid from enum import Enum -from typing import Callable, Generator, List, Literal, Optional, Union +from typing import Callable, Generator, Literal, Optional, Union import torch - import zmq from fairscale.nn.model_parallel.initialize import ( @@ -23,25 +28,16 @@ from fairscale.nn.model_parallel.initialize import ( get_model_parallel_src_rank, ) -from llama_models.llama3.api.datatypes import Message, ToolPromptFormat - from pydantic import BaseModel, Field from torch.distributed.launcher.api import elastic_launch, LaunchConfig from typing_extensions import Annotated +from llama_stack.apis.inference import ChatCompletionRequest, CompletionRequest + from .generation import TokenResult -class InferenceArgs(BaseModel): - messages: List[Message] - temperature: float - top_p: float - max_gen_len: int - logprobs: bool - tool_prompt_format: ToolPromptFormat - - class ProcessingMessageName(str, Enum): ready_request = "ready_request" ready_response = "ready_response" @@ -80,7 +76,7 @@ class TaskRequest(BaseModel): type: Literal[ProcessingMessageName.task_request] = ( ProcessingMessageName.task_request ) - task: InferenceArgs + task: Union[CompletionRequest, ChatCompletionRequest] class TaskResponse(BaseModel): @@ -349,11 +345,13 @@ class ModelParallelProcessGroup: self.process.join() self.started = False - def run_inference(self, inference_args: InferenceArgs) -> Generator: + def run_inference( + self, req: Union[CompletionRequest, ChatCompletionRequest] + ) -> Generator: assert not self.running, "inference already running" self.running = True - self.request_socket.send(encode_msg(TaskRequest(task=inference_args))) + self.request_socket.send(encode_msg(TaskRequest(task=req))) try: while True: obj_json = self.request_socket.recv() diff --git a/llama_stack/providers/impls/meta_reference/safety/llama_guard.py b/llama_stack/providers/impls/meta_reference/safety/llama_guard.py index a6f450fae..99b1c29be 100644 --- a/llama_stack/providers/impls/meta_reference/safety/llama_guard.py +++ b/llama_stack/providers/impls/meta_reference/safety/llama_guard.py @@ -184,7 +184,7 @@ class LlamaGuardShield(ShieldBase): # TODO: llama-stack inference protocol has issues with non-streaming inference code content = "" - async for chunk in self.inference_api.chat_completion( + async for chunk in await self.inference_api.chat_completion( model=self.model, messages=[shield_input_message], stream=True, diff --git a/llama_stack/providers/impls/vllm/vllm.py b/llama_stack/providers/impls/vllm/vllm.py index 5cdb1a2ab..c977c738d 100644 --- a/llama_stack/providers/impls/vllm/vllm.py +++ b/llama_stack/providers/impls/vllm/vllm.py @@ -134,7 +134,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference): if self.engine: self.engine.shutdown_background_loop() - def completion( + async def completion( self, model: str, content: InterleavedTextMedia, @@ -152,7 +152,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference): logprobs=logprobs, ) - def chat_completion( + async def chat_completion( self, model: str, messages: list[Message], @@ -189,7 +189,7 @@ class VLLMInferenceImpl(ModelRegistryHelper, Inference): if stream: return self._stream_chat_completion(request, results_generator) else: - return self._nonstream_chat_completion(request, results_generator) + return await self._nonstream_chat_completion(request, results_generator) async def _nonstream_chat_completion( self, request: ChatCompletionRequest, results_generator: AsyncGenerator diff --git a/llama_stack/providers/tests/agents/test_agents.py b/llama_stack/providers/tests/agents/test_agents.py index 6774d3f1f..9c34c3a28 100644 --- a/llama_stack/providers/tests/agents/test_agents.py +++ b/llama_stack/providers/tests/agents/test_agents.py @@ -116,7 +116,7 @@ async def test_create_agent_turn(agents_settings, sample_messages): ) turn_response = [ - chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + chunk async for chunk in await agents_impl.create_agent_turn(**turn_request) ] assert len(turn_response) > 0 @@ -204,7 +204,7 @@ async def test_rag_agent_as_attachments( ) turn_response = [ - chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + chunk async for chunk in await agents_impl.create_agent_turn(**turn_request) ] assert len(turn_response) > 0 @@ -218,7 +218,7 @@ async def test_rag_agent_as_attachments( ) turn_response = [ - chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + chunk async for chunk in await agents_impl.create_agent_turn(**turn_request) ] assert len(turn_response) > 0 @@ -270,7 +270,7 @@ async def test_create_agent_turn_with_brave_search( ) turn_response = [ - chunk async for chunk in agents_impl.create_agent_turn(**turn_request) + chunk async for chunk in await agents_impl.create_agent_turn(**turn_request) ] assert len(turn_response) > 0 diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 581a0d428..09d6a69db 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -126,6 +126,45 @@ async def test_model_list(inference_settings): assert model_def.identifier == params["model"] +@pytest.mark.asyncio +async def test_completion(inference_settings): + inference_impl = inference_settings["impl"] + params = inference_settings["common_params"] + + provider = inference_impl.routing_table.get_provider_impl(params["model"]) + if provider.__provider_id__ != "meta-reference": + pytest.skip("Other inference providers don't support completion() yet") + + response = await inference_impl.completion( + content="Roses are red,", + stream=False, + model=params["model"], + sampling_params=SamplingParams( + max_tokens=50, + ), + ) + + assert isinstance(response, CompletionResponse) + assert "violets are blue" in response.content + + chunks = [ + r + async for r in await inference_impl.completion( + content="Roses are red,", + stream=True, + model=params["model"], + sampling_params=SamplingParams( + max_tokens=50, + ), + ) + ] + + assert all(isinstance(chunk, CompletionResponseStreamChunk) for chunk in chunks) + assert len(chunks) == 51 + last = chunks[-1] + assert last.stop_reason == StopReason.out_of_tokens + + @pytest.mark.asyncio async def test_chat_completion_non_streaming(inference_settings, sample_messages): inference_impl = inference_settings["impl"] @@ -146,7 +185,7 @@ async def test_chat_completion_streaming(inference_settings, sample_messages): inference_impl = inference_settings["impl"] response = [ r - async for r in inference_impl.chat_completion( + async for r in await inference_impl.chat_completion( messages=sample_messages, stream=True, **inference_settings["common_params"], @@ -217,7 +256,7 @@ async def test_chat_completion_with_tool_calling_streaming( response = [ r - async for r in inference_impl.chat_completion( + async for r in await inference_impl.chat_completion( messages=messages, tools=[sample_tool_definition], stream=True, From 8cfbb9d38b80ca5930e8ba20756cdaa51af30ca0 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 19 Oct 2024 17:19:54 -0700 Subject: [PATCH 21/40] Improve an important error message --- .../distribution/routers/routing_tables.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/llama_stack/distribution/routers/routing_tables.py b/llama_stack/distribution/routers/routing_tables.py index ede30aea1..597dbed07 100644 --- a/llama_stack/distribution/routers/routing_tables.py +++ b/llama_stack/distribution/routers/routing_tables.py @@ -87,8 +87,21 @@ class CommonRoutingTableImpl(RoutingTable): def get_provider_impl( self, routing_key: str, provider_id: Optional[str] = None ) -> Any: + def apiname_object(): + if isinstance(self, ModelsRoutingTable): + return ("Inference", "model") + elif isinstance(self, ShieldsRoutingTable): + return ("Safety", "shield") + elif isinstance(self, MemoryBanksRoutingTable): + return ("Memory", "memory_bank") + else: + raise ValueError("Unknown routing table type") + if routing_key not in self.registry: - raise ValueError(f"`{routing_key}` not registered") + apiname, objname = apiname_object() + raise ValueError( + f"`{routing_key}` not registered. Make sure there is an {apiname} provider serving this {objname}." + ) objs = self.registry[routing_key] for obj in objs: From 59c43736e83c2b3b9726441a28bab09e4d92d52f Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Sat, 19 Oct 2024 17:26:18 -0700 Subject: [PATCH 22/40] update ollama for llama-guard3 --- llama_stack/providers/adapters/inference/ollama/ollama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 3a3e4b451..74aed6e5e 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -33,7 +33,8 @@ OLLAMA_SUPPORTED_MODELS = { "Llama3.1-70B-Instruct": "llama3.1:70b-instruct-fp16", "Llama3.2-1B-Instruct": "llama3.2:1b-instruct-fp16", "Llama3.2-3B-Instruct": "llama3.2:3b-instruct-fp16", - "Llama-Guard-3-8B": "xe/llamaguard3:latest", + "Llama-Guard-3-8B": "llama-guard3:8b", + "Llama-Guard-3-1B": "llama-guard3:1b", } From a27a2cd2af93d73e48d9789ac92c55927da6c44d Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Sun, 20 Oct 2024 21:43:25 -0400 Subject: [PATCH 23/40] Add vLLM inference provider for OpenAI compatible vLLM server (#178) This PR adds vLLM inference provider for OpenAI compatible vLLM server. --- .../build_configs/local-vllm-build.yaml | 2 +- .../templates/remote-vllm-build.yaml | 10 ++ .../adapters/inference/vllm/__init__.py | 15 ++ .../adapters/inference/vllm/config.py | 22 +++ .../providers/adapters/inference/vllm/vllm.py | 152 ++++++++++++++++++ llama_stack/providers/registry/inference.py | 9 ++ 6 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 llama_stack/distribution/templates/remote-vllm-build.yaml create mode 100644 llama_stack/providers/adapters/inference/vllm/__init__.py create mode 100644 llama_stack/providers/adapters/inference/vllm/config.py create mode 100644 llama_stack/providers/adapters/inference/vllm/vllm.py diff --git a/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml b/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml index e907cb7c9..e333a137b 100644 --- a/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml +++ b/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml @@ -7,4 +7,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: conda \ No newline at end of file diff --git a/llama_stack/distribution/templates/remote-vllm-build.yaml b/llama_stack/distribution/templates/remote-vllm-build.yaml new file mode 100644 index 000000000..525c3a930 --- /dev/null +++ b/llama_stack/distribution/templates/remote-vllm-build.yaml @@ -0,0 +1,10 @@ +name: remote-vllm +distribution_spec: + description: Use remote vLLM for running LLM inference + providers: + inference: remote::vllm + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: docker \ No newline at end of file diff --git a/llama_stack/providers/adapters/inference/vllm/__init__.py b/llama_stack/providers/adapters/inference/vllm/__init__.py new file mode 100644 index 000000000..f4588a307 --- /dev/null +++ b/llama_stack/providers/adapters/inference/vllm/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from .config import VLLMImplConfig +from .vllm import VLLMInferenceAdapter + + +async def get_adapter_impl(config: VLLMImplConfig, _deps): + assert isinstance(config, VLLMImplConfig), f"Unexpected config type: {type(config)}" + impl = VLLMInferenceAdapter(config) + await impl.initialize() + return impl diff --git a/llama_stack/providers/adapters/inference/vllm/config.py b/llama_stack/providers/adapters/inference/vllm/config.py new file mode 100644 index 000000000..65815922c --- /dev/null +++ b/llama_stack/providers/adapters/inference/vllm/config.py @@ -0,0 +1,22 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from typing import Optional + +from llama_models.schema_utils import json_schema_type +from pydantic import BaseModel, Field + + +@json_schema_type +class VLLMImplConfig(BaseModel): + url: Optional[str] = Field( + default=None, + description="The URL for the vLLM model serving endpoint", + ) + api_token: Optional[str] = Field( + default=None, + description="The API token", + ) diff --git a/llama_stack/providers/adapters/inference/vllm/vllm.py b/llama_stack/providers/adapters/inference/vllm/vllm.py new file mode 100644 index 000000000..a5934928a --- /dev/null +++ b/llama_stack/providers/adapters/inference/vllm/vllm.py @@ -0,0 +1,152 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. +from typing import AsyncGenerator + +from llama_models.llama3.api.chat_format import ChatFormat +from llama_models.llama3.api.datatypes import Message +from llama_models.llama3.api.tokenizer import Tokenizer + +from openai import OpenAI + +from llama_stack.apis.inference import * # noqa: F403 +from llama_stack.providers.datatypes import ModelsProtocolPrivate + +from llama_stack.providers.utils.inference.openai_compat import ( + get_sampling_options, + process_chat_completion_response, + process_chat_completion_stream_response, +) +from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, +) + +from .config import VLLMImplConfig + +VLLM_SUPPORTED_MODELS = { + "Llama3.1-8B": "meta-llama/Llama-3.1-8B", + "Llama3.1-70B": "meta-llama/Llama-3.1-70B", + "Llama3.1-405B:bf16-mp8": "meta-llama/Llama-3.1-405B", + "Llama3.1-405B": "meta-llama/Llama-3.1-405B-FP8", + "Llama3.1-405B:bf16-mp16": "meta-llama/Llama-3.1-405B", + "Llama3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct", + "Llama3.1-70B-Instruct": "meta-llama/Llama-3.1-70B-Instruct", + "Llama3.1-405B-Instruct:bf16-mp8": "meta-llama/Llama-3.1-405B-Instruct", + "Llama3.1-405B-Instruct": "meta-llama/Llama-3.1-405B-Instruct-FP8", + "Llama3.1-405B-Instruct:bf16-mp16": "meta-llama/Llama-3.1-405B-Instruct", + "Llama3.2-1B": "meta-llama/Llama-3.2-1B", + "Llama3.2-3B": "meta-llama/Llama-3.2-3B", + "Llama3.2-11B-Vision": "meta-llama/Llama-3.2-11B-Vision", + "Llama3.2-90B-Vision": "meta-llama/Llama-3.2-90B-Vision", + "Llama3.2-1B-Instruct": "meta-llama/Llama-3.2-1B-Instruct", + "Llama3.2-3B-Instruct": "meta-llama/Llama-3.2-3B-Instruct", + "Llama3.2-11B-Vision-Instruct": "meta-llama/Llama-3.2-11B-Vision-Instruct", + "Llama3.2-90B-Vision-Instruct": "meta-llama/Llama-3.2-90B-Vision-Instruct", + "Llama-Guard-3-11B-Vision": "meta-llama/Llama-Guard-3-11B-Vision", + "Llama-Guard-3-1B:int4-mp1": "meta-llama/Llama-Guard-3-1B-INT4", + "Llama-Guard-3-1B": "meta-llama/Llama-Guard-3-1B", + "Llama-Guard-3-8B": "meta-llama/Llama-Guard-3-8B", + "Llama-Guard-3-8B:int8-mp1": "meta-llama/Llama-Guard-3-8B-INT8", + "Prompt-Guard-86M": "meta-llama/Prompt-Guard-86M", + "Llama-Guard-2-8B": "meta-llama/Llama-Guard-2-8B", +} + + +class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): + def __init__(self, config: VLLMImplConfig) -> None: + self.config = config + self.formatter = ChatFormat(Tokenizer.get_instance()) + self.client = None + + async def initialize(self) -> None: + self.client = OpenAI(base_url=self.config.url, api_key=self.config.api_token) + + async def register_model(self, model: ModelDef) -> None: + raise ValueError("Model registration is not supported for vLLM models") + + async def shutdown(self) -> None: + pass + + async def list_models(self) -> List[ModelDef]: + return [ + ModelDef(identifier=model.id, llama_model=model.id) + for model in self.client.models.list() + ] + + def completion( + self, + model: str, + content: InterleavedTextMedia, + sampling_params: Optional[SamplingParams] = SamplingParams(), + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> Union[CompletionResponse, CompletionResponseStreamChunk]: + raise NotImplementedError() + + def chat_completion( + self, + model: str, + messages: List[Message], + sampling_params: Optional[SamplingParams] = SamplingParams(), + tools: Optional[List[ToolDefinition]] = None, + tool_choice: Optional[ToolChoice] = ToolChoice.auto, + tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json, + stream: Optional[bool] = False, + logprobs: Optional[LogProbConfig] = None, + ) -> AsyncGenerator: + request = ChatCompletionRequest( + model=model, + messages=messages, + sampling_params=sampling_params, + tools=tools or [], + tool_choice=tool_choice, + tool_prompt_format=tool_prompt_format, + stream=stream, + logprobs=logprobs, + ) + if stream: + return self._stream_chat_completion(request, self.client) + else: + return self._nonstream_chat_completion(request, self.client) + + async def _nonstream_chat_completion( + self, request: ChatCompletionRequest, client: OpenAI + ) -> ChatCompletionResponse: + params = self._get_params(request) + r = client.completions.create(**params) + return process_chat_completion_response(request, r, self.formatter) + + async def _stream_chat_completion( + self, request: ChatCompletionRequest, client: OpenAI + ) -> AsyncGenerator: + params = self._get_params(request) + + # TODO: Can we use client.completions.acreate() or maybe there is another way to directly create an async + # generator so this wrapper is not necessary? + async def _to_async_generator(): + s = client.completions.create(**params) + for chunk in s: + yield chunk + + stream = _to_async_generator() + async for chunk in process_chat_completion_stream_response( + request, stream, self.formatter + ): + yield chunk + + def _get_params(self, request: ChatCompletionRequest) -> dict: + return { + "model": VLLM_SUPPORTED_MODELS[request.model], + "prompt": chat_completion_request_to_prompt(request, self.formatter), + "stream": request.stream, + **get_sampling_options(request), + } + + async def embeddings( + self, + model: str, + contents: List[InterleavedTextMedia], + ) -> EmbeddingsResponse: + raise NotImplementedError() diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index 686fc273b..c3370bfd9 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -60,6 +60,15 @@ def available_providers() -> List[ProviderSpec]: module="llama_stack.providers.adapters.inference.ollama", ), ), +# remote_provider_spec( +# api=Api.inference, +# adapter=AdapterSpec( +# adapter_type="vllm", +# pip_packages=["openai"], +# module="llama_stack.providers.adapters.inference.vllm", +# config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig", +# ), +# ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( From cae5b0708b161658646a1971ab88ecdaa18ad488 Mon Sep 17 00:00:00 2001 From: raghotham Date: Mon, 21 Oct 2024 11:48:19 +0530 Subject: [PATCH 24/40] Create .readthedocs.yaml Trying out readthedocs --- .readthedocs.yaml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 000000000..f89fc906d --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,32 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "19" + # rust: "1.64" + # golang: "1.19" + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt From c995219731ec6961e03e096b5829d6b7a38980d7 Mon Sep 17 00:00:00 2001 From: nehal-a2z Date: Mon, 21 Oct 2024 23:16:53 +0530 Subject: [PATCH 25/40] Update event_logger.py (#275) spelling error --- llama_stack/apis/agents/event_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/apis/agents/event_logger.py b/llama_stack/apis/agents/event_logger.py index b5ad6ae91..25931b821 100644 --- a/llama_stack/apis/agents/event_logger.py +++ b/llama_stack/apis/agents/event_logger.py @@ -180,5 +180,5 @@ class EventLogger: color="cyan", ) - preivous_event_type = event_type + previous_event_type = event_type previous_step_type = step_type From 23210e867935cc7136162bbb82522e201946ded7 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 11:17:53 -0700 Subject: [PATCH 26/40] llama stack distributions / templates / docker refactor (#266) * docker compose ollama * comment * update compose file * readme for distributions * readme * move distribution folders * move distribution/templates to distributions/ * rename * kill distribution/templates * readme * readme * build/developer cookbook/new api provider * developer cookbook * readme * readme * [bugfix] fix case for agent when memory bank registered without specifying provider_id (#264) * fix case where memory bank is registered without provider_id * memory test * agents unit test * Add an option to not use elastic agents for meta-reference inference (#269) * Allow overridding checkpoint_dir via config * Small rename * Make all methods `async def` again; add completion() for meta-reference (#270) PR #201 had made several changes while trying to fix issues with getting the stream=False branches of inference and agents API working. As part of this, it made a change which was slightly gratuitous. Namely, making chat_completion() and brethren "def" instead of "async def". The rationale was that this allowed the user (within llama-stack) of this to use it as: ``` async for chunk in api.chat_completion(params) ``` However, it causes unnecessary confusion for several folks. Given that clients (e.g., llama-stack-apps) anyway use the SDK methods (which are completely isolated) this choice was not ideal. Let's revert back so the call now looks like: ``` async for chunk in await api.chat_completion(params) ``` Bonus: Added a completion() implementation for the meta-reference provider. Technically should have been another PR :) * Improve an important error message * update ollama for llama-guard3 * Add vLLM inference provider for OpenAI compatible vLLM server (#178) This PR adds vLLM inference provider for OpenAI compatible vLLM server. * Create .readthedocs.yaml Trying out readthedocs * Update event_logger.py (#275) spelling error * vllm * build templates * delete templates * tmp add back build to avoid merge conflicts * vllm * vllm --------- Co-authored-by: Ashwin Bharambe Co-authored-by: Ashwin Bharambe Co-authored-by: Yuan Tang Co-authored-by: raghotham Co-authored-by: nehal-a2z --- distributions/README.md | 11 + .../bedrock/build.yaml | 2 +- .../databricks/build.yaml | 4 +- .../fireworks/build.yaml | 2 +- .../hf-endpoint/build.yaml | 2 +- .../hf-serverless/build.yaml | 2 +- distributions/meta-reference-gpu/README.md | 33 +++ .../meta-reference-gpu/build.yaml | 7 +- .../meta-reference-gpu/run.yaml | 10 +- distributions/ollama/README.md | 91 ++++++ distributions/ollama/build.yaml | 13 + distributions/ollama/cpu/compose.yaml | 30 ++ distributions/ollama/cpu/run.yaml | 46 +++ distributions/ollama/gpu/compose.yaml | 48 ++++ distributions/ollama/gpu/run.yaml | 46 +++ distributions/tgi/README.md | 94 ++++++ .../tgi/build.yaml | 9 +- distributions/tgi/cpu/compose.yaml | 54 ++++ .../tgi/cpu/run.yaml | 10 +- .../tgi/gpu}/compose.yaml | 8 +- .../tgi/gpu/run.yaml | 0 .../together/build.yaml | 2 +- .../vllm/build.yaml | 8 +- docs/building_distro.md | 270 ++++++++++++++++++ docs/developer_cookbook.md | 41 +++ docs/getting_started.md | 248 +--------------- docs/new_api_provider.md | 20 ++ llama_stack/cli/stack/build.py | 2 +- llama_stack/distribution/docker/README.md | 28 -- .../build_configs/local-cpu-docker-build.yaml | 15 - .../local-tgi-chroma-docker-build.yaml | 11 - llama_stack/providers/registry/inference.py | 18 +- 32 files changed, 850 insertions(+), 335 deletions(-) create mode 100644 distributions/README.md rename llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml => distributions/bedrock/build.yaml (87%) rename llama_stack/distribution/templates/build_configs/local-databricks-build.yaml => distributions/databricks/build.yaml (85%) rename llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml => distributions/fireworks/build.yaml (91%) rename llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml => distributions/hf-endpoint/build.yaml (93%) rename llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml => distributions/hf-serverless/build.yaml (92%) create mode 100644 distributions/meta-reference-gpu/README.md rename llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml => distributions/meta-reference-gpu/build.yaml (67%) rename llama_stack/distribution/templates/run_configs/local-run.yaml => distributions/meta-reference-gpu/run.yaml (85%) create mode 100644 distributions/ollama/README.md create mode 100644 distributions/ollama/build.yaml create mode 100644 distributions/ollama/cpu/compose.yaml create mode 100644 distributions/ollama/cpu/run.yaml create mode 100644 distributions/ollama/gpu/compose.yaml create mode 100644 distributions/ollama/gpu/run.yaml create mode 100644 distributions/tgi/README.md rename llama_stack/distribution/templates/build_configs/local-tgi-build.yaml => distributions/tgi/build.yaml (51%) create mode 100644 distributions/tgi/cpu/compose.yaml rename llama_stack/distribution/templates/run_configs/local-tgi-run.yaml => distributions/tgi/cpu/run.yaml (83%) rename {llama_stack/distribution/docker/tgi => distributions/tgi/gpu}/compose.yaml (90%) rename llama_stack/distribution/docker/tgi/tgi-run.yaml => distributions/tgi/gpu/run.yaml (100%) rename llama_stack/distribution/templates/build_configs/local-together-build.yaml => distributions/together/build.yaml (92%) rename llama_stack/distribution/templates/build_configs/local-ollama-build.yaml => distributions/vllm/build.yaml (51%) create mode 100644 docs/building_distro.md create mode 100644 docs/developer_cookbook.md create mode 100644 docs/new_api_provider.md delete mode 100644 llama_stack/distribution/docker/README.md delete mode 100644 llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml delete mode 100644 llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml diff --git a/distributions/README.md b/distributions/README.md new file mode 100644 index 000000000..92640210b --- /dev/null +++ b/distributions/README.md @@ -0,0 +1,11 @@ +# Llama Stack Distribution + +A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications. + + +## Quick Start Llama Stack Distributions Guide +| **Distribution** | **Llama Stack Docker** | Start This Distribution | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|:----------------: |:------------------------------------------: |:-----------------------: |:------------------: |:------------------: |:------------------: |:------------------: |:------------------: | +| Meta Reference | llamastack/distribution-meta-reference-gpu | [Guide](./meta-reference-gpu/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Ollama | llamastack/distribution-ollama | [Guide](./ollama/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| TGI | llamastack/distribution-tgi | [Guide](./tgi/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | diff --git a/llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml b/distributions/bedrock/build.yaml similarity index 87% rename from llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml rename to distributions/bedrock/build.yaml index 50d5e7048..ae7b27d49 100644 --- a/llama_stack/distribution/templates/build_configs/local-bedrock-conda-example-build.yaml +++ b/distributions/bedrock/build.yaml @@ -1,4 +1,4 @@ -name: local-bedrock-conda-example +name: bedrock distribution_spec: description: Use Amazon Bedrock APIs. providers: diff --git a/llama_stack/distribution/templates/build_configs/local-databricks-build.yaml b/distributions/databricks/build.yaml similarity index 85% rename from llama_stack/distribution/templates/build_configs/local-databricks-build.yaml rename to distributions/databricks/build.yaml index 754af7668..2188dd0a0 100644 --- a/llama_stack/distribution/templates/build_configs/local-databricks-build.yaml +++ b/distributions/databricks/build.yaml @@ -1,4 +1,4 @@ -name: local-databricks +name: databricks distribution_spec: description: Use Databricks for running LLM inference providers: @@ -7,4 +7,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda \ No newline at end of file +image_type: conda diff --git a/llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml b/distributions/fireworks/build.yaml similarity index 91% rename from llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml rename to distributions/fireworks/build.yaml index 33bdee3b5..831643ff1 100644 --- a/llama_stack/distribution/templates/build_configs/local-fireworks-build.yaml +++ b/distributions/fireworks/build.yaml @@ -1,4 +1,4 @@ -name: local-fireworks +name: fireworks distribution_spec: description: Use Fireworks.ai for running LLM inference providers: diff --git a/llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml b/distributions/hf-endpoint/build.yaml similarity index 93% rename from llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml rename to distributions/hf-endpoint/build.yaml index e5c4ae8cc..750bebcb5 100644 --- a/llama_stack/distribution/templates/build_configs/local-hf-endpoint-build.yaml +++ b/distributions/hf-endpoint/build.yaml @@ -1,4 +1,4 @@ -name: local-hf-endpoint +name: hf-endpoint distribution_spec: description: "Like local, but use Hugging Face Inference Endpoints for running LLM inference.\nSee https://hf.co/docs/api-endpoints." providers: diff --git a/llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml b/distributions/hf-serverless/build.yaml similarity index 92% rename from llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml rename to distributions/hf-serverless/build.yaml index 752390b40..f6da3ad4d 100644 --- a/llama_stack/distribution/templates/build_configs/local-hf-serverless-build.yaml +++ b/distributions/hf-serverless/build.yaml @@ -1,4 +1,4 @@ -name: local-hf-serverless +name: hf-serverless distribution_spec: description: "Like local, but use Hugging Face Inference API (serverless) for running LLM inference.\nSee https://hf.co/docs/api-inference." providers: diff --git a/distributions/meta-reference-gpu/README.md b/distributions/meta-reference-gpu/README.md new file mode 100644 index 000000000..951120da5 --- /dev/null +++ b/distributions/meta-reference-gpu/README.md @@ -0,0 +1,33 @@ +# Meta Reference Distribution + +The `llamastack/distribution-meta-reference-gpu` distribution consists of the following provider configurations. + + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | +| **Provider(s)** | meta-reference | meta-reference | meta-reference, remote::pgvector, remote::chroma | meta-reference | meta-reference | + + +### Start the Distribution (Single Node GPU) + +> [!NOTE] +> This assumes you have access to GPU to start a TGI server with access to your GPU. + +> [!NOTE] +> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container. +``` +export LLAMA_CHECKPOINT_DIR=~/.llama +``` + +> [!NOTE] +> `~/.llama` should be the path containing downloaded weights of Llama models. + + +To download and start running a pre-built docker container, you may use the following commands: + +``` +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu +``` + +### Alternative (Build and start distribution locally via conda) +- You may checkout the [Getting Started](../../docs/getting_started.md) for more details on starting up a meta-reference distribution. diff --git a/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml b/distributions/meta-reference-gpu/build.yaml similarity index 67% rename from llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml rename to distributions/meta-reference-gpu/build.yaml index 01af1021e..ca786c51c 100644 --- a/llama_stack/distribution/templates/build_configs/local-gpu-docker-build.yaml +++ b/distributions/meta-reference-gpu/build.yaml @@ -1,9 +1,12 @@ -name: local-gpu +name: distribution-meta-reference-gpu distribution_spec: description: Use code from `llama_stack` itself to serve all llama stack APIs providers: inference: meta-reference - memory: meta-reference + memory: + - meta-reference + - remote::chromadb + - remote::pgvector safety: meta-reference agents: meta-reference telemetry: meta-reference diff --git a/llama_stack/distribution/templates/run_configs/local-run.yaml b/distributions/meta-reference-gpu/run.yaml similarity index 85% rename from llama_stack/distribution/templates/run_configs/local-run.yaml rename to distributions/meta-reference-gpu/run.yaml index 7abf2b4dc..724ca030a 100644 --- a/llama_stack/distribution/templates/run_configs/local-run.yaml +++ b/distributions/meta-reference-gpu/run.yaml @@ -13,7 +13,7 @@ apis: - safety providers: inference: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: model: Llama3.1-8B-Instruct @@ -22,7 +22,7 @@ providers: max_seq_len: 4096 max_batch_size: 1 safety: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: llama_guard_shield: @@ -33,11 +33,11 @@ providers: prompt_guard_shield: model: Prompt-Guard-86M memory: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: {} agents: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: persistence_store: @@ -45,6 +45,6 @@ providers: type: sqlite db_path: ~/.llama/runtime/kvstore.db telemetry: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: {} diff --git a/distributions/ollama/README.md b/distributions/ollama/README.md new file mode 100644 index 000000000..43c764cbe --- /dev/null +++ b/distributions/ollama/README.md @@ -0,0 +1,91 @@ +# Ollama Distribution + +The `llamastack/distribution-ollama` distribution consists of the following provider configurations. + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |---------------- |---------------- |---------------------------------- |---------------- |---------------- | +| **Provider(s)** | remote::ollama | meta-reference | remote::pgvector, remote::chroma | remote::ollama | meta-reference | + + +### Start a Distribution (Single Node GPU) + +> [!NOTE] +> This assumes you have access to GPU to start a Ollama server with access to your GPU. + +``` +$ cd llama-stack/distribution/ollama/gpu +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +You will see outputs similar to following --- +``` +[ollama] | [GIN] 2024/10/18 - 21:19:41 | 200 | 226.841µs | ::1 | GET "/api/ps" +[ollama] | [GIN] 2024/10/18 - 21:19:42 | 200 | 60.908µs | ::1 | GET "/api/ps" +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +[llamastack] | Resolved 12 providers +[llamastack] | inner-inference => ollama0 +[llamastack] | models => __routing_table__ +[llamastack] | inference => __autorouted__ +``` + +To kill the server +``` +docker compose down +``` + +### Start the Distribution (Single Node CPU) + +> [!NOTE] +> This will start an ollama server with CPU only, please see [Ollama Documentations](https://github.com/ollama/ollama) for serving models on CPU only. + +``` +$ cd llama-stack/distribution/ollama/cpu +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +### (Alternative) ollama run + llama stack Run + +If you wish to separately spin up a Ollama server, and connect with Llama Stack, you may use the following commands. + +#### Start Ollama server. +- Please check the [Ollama Documentations](https://github.com/ollama/ollama) for more details. + +**Via Docker** +``` +docker run -d -v ollama:/root/.ollama -p 11434:11434 --name ollama ollama/ollama +``` + +**Via CLI** +``` +ollama run +``` + +#### Start Llama Stack server pointing to Ollama server + +**Via Docker** +``` +docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./ollama-run.yaml:/root/llamastack-run-ollama.yaml --gpus=all llamastack-local-cpu --yaml_config /root/llamastack-run-ollama.yaml +``` + +Make sure in you `ollama-run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g. +``` +inference: + - provider_id: ollama0 + provider_type: remote::ollama + config: + url: http://127.0.0.1:14343 +``` + +**Via Conda** + +``` +llama stack build --config ./build.yaml +llama stack run ./gpu/run.yaml +``` diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml new file mode 100644 index 000000000..d14091814 --- /dev/null +++ b/distributions/ollama/build.yaml @@ -0,0 +1,13 @@ +name: distribution-ollama +distribution_spec: + description: Use ollama for running LLM inference + providers: + inference: remote::ollama + memory: + - meta-reference + - remote::chromadb + - remote::pgvector + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda diff --git a/distributions/ollama/cpu/compose.yaml b/distributions/ollama/cpu/compose.yaml new file mode 100644 index 000000000..841b0b88c --- /dev/null +++ b/distributions/ollama/cpu/compose.yaml @@ -0,0 +1,30 @@ +services: + ollama: + image: ollama/ollama:latest + network_mode: "host" + volumes: + - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast + ports: + - "11434:11434" + command: [] + llamastack: + depends_on: + - ollama + image: llamastack/llamastack-local-cpu + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./run.yaml:/root/my-run.yaml + ports: + - "5000:5000" + # Hack: wait for ollama server to start before starting docker + entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s +volumes: + ollama: diff --git a/distributions/ollama/cpu/run.yaml b/distributions/ollama/cpu/run.yaml new file mode 100644 index 000000000..798dabc0b --- /dev/null +++ b/distributions/ollama/cpu/run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: ollama0 + provider_type: remote::ollama + config: + url: http://127.0.0.1:14343 + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/distributions/ollama/gpu/compose.yaml b/distributions/ollama/gpu/compose.yaml new file mode 100644 index 000000000..2e3f85e45 --- /dev/null +++ b/distributions/ollama/gpu/compose.yaml @@ -0,0 +1,48 @@ +services: + ollama: + image: ollama/ollama:latest + network_mode: "host" + volumes: + - ollama:/root/.ollama # this solution synchronizes with the docker volume and loads the model rocket fast + ports: + - "11434:11434" + devices: + - nvidia.com/gpu=all + environment: + - CUDA_VISIBLE_DEVICES=0 + command: [] + deploy: + resources: + reservations: + devices: + - driver: nvidia + # that's the closest analogue to --gpus; provide + # an integer amount of devices or 'all' + count: 1 + # Devices are reserved using a list of capabilities, making + # capabilities the only required field. A device MUST + # satisfy all the requested capabilities for a successful + # reservation. + capabilities: [gpu] + runtime: nvidia + llamastack-local-cpu: + depends_on: + - ollama + image: llamastack/llamastack-local-cpu + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./ollama-run.yaml:/root/llamastack-run-ollama.yaml + ports: + - "5000:5000" + # Hack: wait for ollama server to start before starting docker + entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-ollama.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s +volumes: + ollama: diff --git a/distributions/ollama/gpu/run.yaml b/distributions/ollama/gpu/run.yaml new file mode 100644 index 000000000..798dabc0b --- /dev/null +++ b/distributions/ollama/gpu/run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: ollama0 + provider_type: remote::ollama + config: + url: http://127.0.0.1:14343 + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/distributions/tgi/README.md b/distributions/tgi/README.md new file mode 100644 index 000000000..86d2636d7 --- /dev/null +++ b/distributions/tgi/README.md @@ -0,0 +1,94 @@ +# TGI Distribution + +The `llamastack/distribution-tgi` distribution consists of the following provider configurations. + + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | +| **Provider(s)** | remote::tgi | meta-reference | meta-reference, remote::pgvector, remote::chroma | meta-reference | meta-reference | + + +### Start the Distribution (Single Node GPU) + +> [!NOTE] +> This assumes you have access to GPU to start a TGI server with access to your GPU. + + +``` +$ cd llama_stack/distribution/docker/tgi +$ ls +compose.yaml tgi-run.yaml +$ docker compose up +``` + +The script will first start up TGI server, then start up Llama Stack distribution server hooking up to the remote TGI provider for inference. You should be able to see the following outputs -- +``` +[text-generation-inference] | 2024-10-15T18:56:33.810397Z INFO text_generation_router::server: router/src/server.rs:1813: Using config Some(Llama) +[text-generation-inference] | 2024-10-15T18:56:33.810448Z WARN text_generation_router::server: router/src/server.rs:1960: Invalid hostname, defaulting to 0.0.0.0 +[text-generation-inference] | 2024-10-15T18:56:33.864143Z INFO text_generation_router::server: router/src/server.rs:2353: Connected +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +To kill the server +``` +docker compose down +``` + +### Start the Distribution (Single Node CPU) + +> [!NOTE] +> This assumes you have an hosted endpoint compatible with TGI server. + +``` +$ cd llama-stack/distribution/tgi/cpu +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +Replace in `run.yaml` file with your TGI endpoint. +``` +inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: +``` + +### (Alternative) TGI server + llama stack run (Single Node GPU) + +If you wish to separately spin up a TGI server, and connect with Llama Stack, you may use the following commands. + +#### (optional) Start TGI server locally +- Please check the [TGI Getting Started Guide](https://github.com/huggingface/text-generation-inference?tab=readme-ov-file#get-started) to get a TGI endpoint. + +``` +docker run --rm -it -v $HOME/.cache/huggingface:/data -p 5009:5009 --gpus all ghcr.io/huggingface/text-generation-inference:latest --dtype bfloat16 --usage-stats on --sharded false --model-id meta-llama/Llama-3.1-8B-Instruct --port 5009 +``` + + +#### Start Llama Stack server pointing to TGI server + +``` +docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack-local-cpu --yaml_config /root/my-run.yaml +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct TGI server endpoint. E.g. +``` +inference: + - provider_id: tgi0 + provider_type: remote::tgi + config: + url: http://127.0.0.1:5009 +``` + +**Via Conda** + +```bash +llama stack build --config ./build.yaml +# -- start a TGI server endpoint +llama stack run ./gpu/run.yaml +``` diff --git a/llama_stack/distribution/templates/build_configs/local-tgi-build.yaml b/distributions/tgi/build.yaml similarity index 51% rename from llama_stack/distribution/templates/build_configs/local-tgi-build.yaml rename to distributions/tgi/build.yaml index d4752539d..c3950e900 100644 --- a/llama_stack/distribution/templates/build_configs/local-tgi-build.yaml +++ b/distributions/tgi/build.yaml @@ -1,9 +1,12 @@ -name: local-tgi +name: distribution-tgi distribution_spec: - description: Like local, but use a TGI server for running LLM inference. + description: Use TGI for running LLM inference providers: inference: remote::tgi - memory: meta-reference + memory: + - meta-reference + - remote::chromadb + - remote::pgvector safety: meta-reference agents: meta-reference telemetry: meta-reference diff --git a/distributions/tgi/cpu/compose.yaml b/distributions/tgi/cpu/compose.yaml new file mode 100644 index 000000000..df7c74489 --- /dev/null +++ b/distributions/tgi/cpu/compose.yaml @@ -0,0 +1,54 @@ +services: + text-generation-inference: + image: ghcr.io/huggingface/text-generation-inference:latest + network_mode: "host" + volumes: + - $HOME/.cache/huggingface:/data + ports: + - "5009:5009" + devices: + - nvidia.com/gpu=all + environment: + - CUDA_VISIBLE_DEVICES=0 + - HF_HOME=/data + - HF_DATASETS_CACHE=/data + - HF_MODULES_CACHE=/data + - HF_HUB_CACHE=/data + command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"] + deploy: + resources: + reservations: + devices: + - driver: nvidia + # that's the closest analogue to --gpus; provide + # an integer amount of devices or 'all' + count: 1 + # Devices are reserved using a list of capabilities, making + # capabilities the only required field. A device MUST + # satisfy all the requested capabilities for a successful + # reservation. + capabilities: [gpu] + runtime: nvidia + healthcheck: + test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"] + interval: 5s + timeout: 5s + retries: 30 + llamastack: + depends_on: + text-generation-inference: + condition: service_healthy + image: llamastack/llamastack-local-cpu + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to run.yaml file + - ./run.yaml:/root/my-run.yaml + ports: + - "5000:5000" + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s diff --git a/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml b/distributions/tgi/cpu/run.yaml similarity index 83% rename from llama_stack/distribution/templates/run_configs/local-tgi-run.yaml rename to distributions/tgi/cpu/run.yaml index ec3af742c..bf46391b4 100644 --- a/llama_stack/distribution/templates/run_configs/local-tgi-run.yaml +++ b/distributions/tgi/cpu/run.yaml @@ -16,9 +16,9 @@ providers: - provider_id: tgi0 provider_type: remote::tgi config: - url: http://127.0.0.1:5009 + url: safety: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: llama_guard_shield: @@ -29,11 +29,11 @@ providers: prompt_guard_shield: model: Prompt-Guard-86M memory: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: {} agents: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: persistence_store: @@ -41,6 +41,6 @@ providers: type: sqlite db_path: ~/.llama/runtime/kvstore.db telemetry: - - provider_id: meta-reference + - provider_id: meta0 provider_type: meta-reference config: {} diff --git a/llama_stack/distribution/docker/tgi/compose.yaml b/distributions/tgi/gpu/compose.yaml similarity index 90% rename from llama_stack/distribution/docker/tgi/compose.yaml rename to distributions/tgi/gpu/compose.yaml index d5bcd50f3..60dbe4938 100644 --- a/llama_stack/distribution/docker/tgi/compose.yaml +++ b/distributions/tgi/gpu/compose.yaml @@ -34,20 +34,20 @@ services: interval: 5s timeout: 5s retries: 30 - llamastack-local-cpu: + llamastack: depends_on: text-generation-inference: condition: service_healthy - image: llamastack-local-cpu + image: llamastack/llamastack-local-cpu network_mode: "host" volumes: - ~/.llama:/root/.llama # Link to TGI run.yaml file - - ./tgi-run.yaml:/root/llamastack-run-tgi.yaml + - ./run.yaml:/root/my-run.yaml ports: - "5000:5000" # Hack: wait for TGI server to start before starting docker - entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-tgi.yaml" + entrypoint: bash -c "sleep 60; python -m llama_stack.distribution.server.server --yaml_config /root/my-run.yaml" restart_policy: condition: on-failure delay: 3s diff --git a/llama_stack/distribution/docker/tgi/tgi-run.yaml b/distributions/tgi/gpu/run.yaml similarity index 100% rename from llama_stack/distribution/docker/tgi/tgi-run.yaml rename to distributions/tgi/gpu/run.yaml diff --git a/llama_stack/distribution/templates/build_configs/local-together-build.yaml b/distributions/together/build.yaml similarity index 92% rename from llama_stack/distribution/templates/build_configs/local-together-build.yaml rename to distributions/together/build.yaml index ebf0bf1fb..67ba2eefa 100644 --- a/llama_stack/distribution/templates/build_configs/local-together-build.yaml +++ b/distributions/together/build.yaml @@ -1,4 +1,4 @@ -name: local-together +name: together distribution_spec: description: Use Together.ai for running LLM inference providers: diff --git a/llama_stack/distribution/templates/build_configs/local-ollama-build.yaml b/distributions/vllm/build.yaml similarity index 51% rename from llama_stack/distribution/templates/build_configs/local-ollama-build.yaml rename to distributions/vllm/build.yaml index d9116b4b1..f41352eb1 100644 --- a/llama_stack/distribution/templates/build_configs/local-ollama-build.yaml +++ b/distributions/vllm/build.yaml @@ -1,10 +1,10 @@ -name: local-ollama +name: vllm distribution_spec: - description: Like local, but use ollama for running LLM inference + description: Like local, but use vLLM for running LLM inference providers: - inference: remote::ollama + inference: vllm memory: meta-reference safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: conda \ No newline at end of file diff --git a/docs/building_distro.md b/docs/building_distro.md new file mode 100644 index 000000000..05e5c09bb --- /dev/null +++ b/docs/building_distro.md @@ -0,0 +1,270 @@ +# Building a Llama Stack Distribution + +This guide will walk you through the steps to get started with building a Llama Stack distributiom from scratch with your choice of API providers. Please see the [Getting Started Guide](./getting_started.md) if you just want the basic steps to start a Llama Stack distribution. + +## Step 1. Build +In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: +- `name`: the name for our distribution (e.g. `8b-instruct`) +- `image_type`: our build image type (`conda | docker`) +- `distribution_spec`: our distribution specs for specifying API providers + - `description`: a short description of the configurations for the distribution + - `providers`: specifies the underlying implementation for serving each API endpoint + - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment. + + +At the end of build command, we will generate `-build.yaml` file storing the build configurations. + +After this step is complete, a file named `-build.yaml` will be generated and saved at the output file path specified at the end of the command. + +#### Building from scratch +- For a new user, we could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations. +``` +llama stack build +``` + +Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs. + +``` +> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct +> Enter the image type you want your distribution to be built with (docker or conda): conda + + Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs. +> Enter the API provider for the inference API: (default=meta-reference): meta-reference +> Enter the API provider for the safety API: (default=meta-reference): meta-reference +> Enter the API provider for the agents API: (default=meta-reference): meta-reference +> Enter the API provider for the memory API: (default=meta-reference): meta-reference +> Enter the API provider for the telemetry API: (default=meta-reference): meta-reference + + > (Optional) Enter a short description for your Llama Stack distribution: + +Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml +``` + +**Ollama (optional)** + +If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download). + + +#### Building from templates +- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers. + +The following command will allow you to see the available templates and their corresponding providers. +``` +llama stack build --list-templates +``` + +![alt text](resources/list-templates.png) + +You may then pick a template to build your distribution with providers fitted to your liking. + +``` +llama stack build --template local-tgi --name my-tgi-stack +``` + +``` +$ llama stack build --template local-tgi --name my-tgi-stack +... +... +Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml +You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml` +``` + +#### Building from config file +- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. + +- The config file will be of contents like the ones in `llama_stack/distributions/templates/`. + +``` +$ cat llama_stack/distribution/templates/local-ollama-build.yaml + +name: local-ollama +distribution_spec: + description: Like local, but use ollama for running LLM inference + providers: + inference: remote::ollama + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda +``` + +``` +llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml +``` + +#### How to build distribution with Docker image + +> [!TIP] +> Podman is supported as an alternative to Docker. Set `DOCKER_BINARY` to `podman` in your environment to use Podman. + +To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type. + +``` +llama stack build --template local --image-type docker --name docker-0 +``` + +Alternatively, you may use a config file and set `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build -build.yaml`. The `-build.yaml` will be of contents like: + +``` +name: local-docker-example +distribution_spec: + description: Use code from `llama_stack` itself to serve all llama stack APIs + docker_image: null + providers: + inference: meta-reference + memory: meta-reference-faiss + safety: meta-reference + agentic_system: meta-reference + telemetry: console +image_type: docker +``` + +The following command allows you to build a Docker image with the name `` +``` +llama stack build --config -build.yaml + +Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim +WORKDIR /app +... +... +You can run it with: podman run -p 8000:8000 llamastack-docker-local +Build spec configuration saved at ~/.llama/distributions/docker/docker-local-build.yaml +``` + + +## Step 2. Configure +After our distribution is built (either in form of docker or conda environment), we will run the following command to +``` +llama stack configure [ | | ] +``` +- For `conda` environments: would be the generated build spec saved from Step 1. +- For `docker` images downloaded from Dockerhub, you could also use as the argument. + - Run `docker images` to check list of available images on your machine. + +``` +$ llama stack configure 8b-instruct + +Configuring API: inference (meta-reference) +Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required): +Enter value for quantization (optional): +Enter value for torch_seed (optional): +Enter value for max_seq_len (existing: 4096) (required): +Enter value for max_batch_size (existing: 1) (required): + +Configuring API: memory (meta-reference-faiss) + +Configuring API: safety (meta-reference) +Do you want to configure llama_guard_shield? (y/n): y +Entering sub-configuration for llama_guard_shield: +Enter value for model (default: Llama-Guard-3-1B) (required): +Enter value for excluded_categories (default: []) (required): +Enter value for disable_input_check (default: False) (required): +Enter value for disable_output_check (default: False) (required): +Do you want to configure prompt_guard_shield? (y/n): y +Entering sub-configuration for prompt_guard_shield: +Enter value for model (default: Prompt-Guard-86M) (required): + +Configuring API: agentic_system (meta-reference) +Enter value for brave_search_api_key (optional): +Enter value for bing_search_api_key (optional): +Enter value for wolfram_api_key (optional): + +Configuring API: telemetry (console) + +YAML configuration has been written to ~/.llama/builds/conda/8b-instruct-run.yaml +``` + +After this step is successful, you should be able to find a run configuration spec in `~/.llama/builds/conda/8b-instruct-run.yaml` with the following contents. You may edit this file to change the settings. + +As you can see, we did basic configuration above and configured: +- inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) +- Llama Guard safety shield with model `Llama-Guard-3-1B` +- Prompt Guard safety shield with model `Prompt-Guard-86M` + +For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. + +Note that all configurations as well as models are stored in `~/.llama` + + +## Step 3. Run +Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step. + +``` +llama stack run 8b-instruct +``` + +You should see the Llama Stack server start and print the APIs that it is supporting + +``` +$ llama stack run 8b-instruct + +> initializing model parallel with size 1 +> initializing ddp with size 1 +> initializing pipeline with size 1 +Loaded in 19.28 seconds +NCCL version 2.20.5+cuda12.4 +Finished model load YES READY +Serving POST /inference/batch_chat_completion +Serving POST /inference/batch_completion +Serving POST /inference/chat_completion +Serving POST /inference/completion +Serving POST /safety/run_shield +Serving POST /agentic_system/memory_bank/attach +Serving POST /agentic_system/create +Serving POST /agentic_system/session/create +Serving POST /agentic_system/turn/create +Serving POST /agentic_system/delete +Serving POST /agentic_system/session/delete +Serving POST /agentic_system/memory_bank/detach +Serving POST /agentic_system/session/get +Serving POST /agentic_system/step/get +Serving POST /agentic_system/turn/get +Listening on :::5000 +INFO: Started server process [453333] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +> [!NOTE] +> Configuration is in `~/.llama/builds/local/conda/8b-instruct-run.yaml`. Feel free to increase `max_seq_len`. + +> [!IMPORTANT] +> The "local" distribution inference server currently only supports CUDA. It will not work on Apple Silicon machines. + +> [!TIP] +> You might need to use the flag `--disable-ipv6` to Disable IPv6 support + +This server is running a Llama model locally. + +## Step 4. Test with Client +Once the server is setup, we can test it with a client to see the example outputs. +``` +cd /path/to/llama-stack +conda activate # any environment containing the llama-stack pip package will work + +python -m llama_stack.apis.inference.client localhost 5000 +``` + +This will run the chat completion client and query the distribution’s /inference/chat_completion API. + +Here is an example output: +``` +User>hello world, write me a 2 sentence poem about the moon +Assistant> Here's a 2-sentence poem about the moon: + +The moon glows softly in the midnight sky, +A beacon of wonder, as it passes by. +``` + +Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: + +``` +python -m llama_stack.apis.safety.client localhost 5000 +``` + + +Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. + +You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. diff --git a/docs/developer_cookbook.md b/docs/developer_cookbook.md new file mode 100644 index 000000000..eed1aca3d --- /dev/null +++ b/docs/developer_cookbook.md @@ -0,0 +1,41 @@ +# Llama Stack Developer Cookbook + +Based on your developer needs, below are references to guides to help you get started. + +### Hosted Llama Stack Endpoint +* Developer Need: I want to connect to a Llama Stack endpoint to build my applications. +* Effort: 1min +* Guide: + - Checkout our [DeepLearning course](https://www.deeplearning.ai/short-courses/introducing-multimodal-llama-3-2) on building with Llama Stack apps on pre-hosted Llama Stack endpoint. + + +### Local meta-reference Llama Stack Server +* Developer Need: I want to start a local Llama Stack server with my GPU using meta-reference implementations. +* Effort: 5min +* Guide: + - Please see our [Getting Started Guide](./getting_started.md) on starting up a meta-reference Llama Stack server. + +### Llama Stack Server with Remote Providers +* Developer need: I want a Llama Stack distribution with a remote provider. +* Effort: 10min +* Guide + - Please see our [Distributions Guide](../distributions/) on starting up distributions with remote providers. + + +### On-Device (iOS) Llama Stack +* Developer Need: I want to use Llama Stack on-Device +* Effort: 1.5hr +* Guide: + - Please see our [iOS Llama Stack SDK](../llama_stack/providers/impls/ios/inference) implementations + +### Assemble your own Llama Stack Distribution +* Developer Need: I want to assemble my own distribution with API providers to my likings +* Effort: 30min +* Guide + - Please see our [Building Distribution](./building_distro.md) guide for assembling your own Llama Stack distribution with your choice of API providers. + +### Adding a New API Provider +* Developer Need: I want to add a new API provider to Llama Stack. +* Effort: 3hr +* Guide + - Please see our [Adding a New API Provider](./new_api_provider.md) guide for adding a new API provider. diff --git a/docs/getting_started.md b/docs/getting_started.md index 3eebf8bbc..e3db908a7 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -23,8 +23,7 @@ $CONDA_PREFIX/bin/pip install -e . For what you can do with the Llama CLI, please refer to [CLI Reference](./cli_reference.md). -## Quick Starting Llama Stack Server - +## Starting Up Llama Stack Server #### Starting up server via docker We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links. @@ -50,7 +49,7 @@ docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llama ``` > [!TIP] -> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI) using [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general). You can checkout [these scripts](../llama_stack/distribution/docker/README.md) to help you get started. +> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI) using [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general). You can checkout [these scripts](../distributions/) to help you get started. #### Build->Configure->Run Llama Stack server via conda You may also build a LlamaStack distribution from scratch, configure it, and start running the distribution. This is useful for developing on LlamaStack. @@ -160,245 +159,8 @@ INFO: Application startup complete. INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) ``` -## Building a Distribution -## Step 1. Build -In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: -- `name`: the name for our distribution (e.g. `8b-instruct`) -- `image_type`: our build image type (`conda | docker`) -- `distribution_spec`: our distribution specs for specifying API providers - - `description`: a short description of the configurations for the distribution - - `providers`: specifies the underlying implementation for serving each API endpoint - - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment. - - -At the end of build command, we will generate `-build.yaml` file storing the build configurations. - -After this step is complete, a file named `-build.yaml` will be generated and saved at the output file path specified at the end of the command. - -#### Building from scratch -- For a new user, we could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations. -``` -llama stack build -``` - -Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs. - -``` -> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct -> Enter the image type you want your distribution to be built with (docker or conda): conda - - Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs. -> Enter the API provider for the inference API: (default=meta-reference): meta-reference -> Enter the API provider for the safety API: (default=meta-reference): meta-reference -> Enter the API provider for the agents API: (default=meta-reference): meta-reference -> Enter the API provider for the memory API: (default=meta-reference): meta-reference -> Enter the API provider for the telemetry API: (default=meta-reference): meta-reference - - > (Optional) Enter a short description for your Llama Stack distribution: - -Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml -``` - -**Ollama (optional)** - -If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download). - - -#### Building from templates -- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers. - -The following command will allow you to see the available templates and their corresponding providers. -``` -llama stack build --list-templates -``` - -![alt text](resources/list-templates.png) - -You may then pick a template to build your distribution with providers fitted to your liking. - -``` -llama stack build --template local-tgi --name my-tgi-stack -``` - -``` -$ llama stack build --template local-tgi --name my-tgi-stack -... -... -Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml -You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml` -``` - -#### Building from config file -- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. - -- The config file will be of contents like the ones in `llama_stack/distributions/templates/`. - -``` -$ cat llama_stack/distribution/templates/local-ollama-build.yaml - -name: local-ollama -distribution_spec: - description: Like local, but use ollama for running LLM inference - providers: - inference: remote::ollama - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda -``` - -``` -llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml -``` - -#### How to build distribution with Docker image - -> [!TIP] -> Podman is supported as an alternative to Docker. Set `DOCKER_BINARY` to `podman` in your environment to use Podman. - -To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type. - -``` -llama stack build --template local --image-type docker --name docker-0 -``` - -Alternatively, you may use a config file and set `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build -build.yaml`. The `-build.yaml` will be of contents like: - -``` -name: local-docker-example -distribution_spec: - description: Use code from `llama_stack` itself to serve all llama stack APIs - docker_image: null - providers: - inference: meta-reference - memory: meta-reference-faiss - safety: meta-reference - agentic_system: meta-reference - telemetry: console -image_type: docker -``` - -The following command allows you to build a Docker image with the name `` -``` -llama stack build --config -build.yaml - -Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim -WORKDIR /app -... -... -You can run it with: podman run -p 8000:8000 llamastack-docker-local -Build spec configuration saved at ~/.llama/distributions/docker/docker-local-build.yaml -``` - - -## Step 2. Configure -After our distribution is built (either in form of docker or conda environment), we will run the following command to -``` -llama stack configure [ | | ] -``` -- For `conda` environments: would be the generated build spec saved from Step 1. -- For `docker` images downloaded from Dockerhub, you could also use as the argument. - - Run `docker images` to check list of available images on your machine. - -``` -$ llama stack configure 8b-instruct - -Configuring API: inference (meta-reference) -Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required): -Enter value for quantization (optional): -Enter value for torch_seed (optional): -Enter value for max_seq_len (existing: 4096) (required): -Enter value for max_batch_size (existing: 1) (required): - -Configuring API: memory (meta-reference-faiss) - -Configuring API: safety (meta-reference) -Do you want to configure llama_guard_shield? (y/n): y -Entering sub-configuration for llama_guard_shield: -Enter value for model (default: Llama-Guard-3-1B) (required): -Enter value for excluded_categories (default: []) (required): -Enter value for disable_input_check (default: False) (required): -Enter value for disable_output_check (default: False) (required): -Do you want to configure prompt_guard_shield? (y/n): y -Entering sub-configuration for prompt_guard_shield: -Enter value for model (default: Prompt-Guard-86M) (required): - -Configuring API: agentic_system (meta-reference) -Enter value for brave_search_api_key (optional): -Enter value for bing_search_api_key (optional): -Enter value for wolfram_api_key (optional): - -Configuring API: telemetry (console) - -YAML configuration has been written to ~/.llama/builds/conda/8b-instruct-run.yaml -``` - -After this step is successful, you should be able to find a run configuration spec in `~/.llama/builds/conda/8b-instruct-run.yaml` with the following contents. You may edit this file to change the settings. - -As you can see, we did basic configuration above and configured: -- inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) -- Llama Guard safety shield with model `Llama-Guard-3-1B` -- Prompt Guard safety shield with model `Prompt-Guard-86M` - -For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. - -Note that all configurations as well as models are stored in `~/.llama` - - -## Step 3. Run -Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step. - -``` -llama stack run 8b-instruct -``` - -You should see the Llama Stack server start and print the APIs that it is supporting - -``` -$ llama stack run 8b-instruct - -> initializing model parallel with size 1 -> initializing ddp with size 1 -> initializing pipeline with size 1 -Loaded in 19.28 seconds -NCCL version 2.20.5+cuda12.4 -Finished model load YES READY -Serving POST /inference/batch_chat_completion -Serving POST /inference/batch_completion -Serving POST /inference/chat_completion -Serving POST /inference/completion -Serving POST /safety/run_shield -Serving POST /agentic_system/memory_bank/attach -Serving POST /agentic_system/create -Serving POST /agentic_system/session/create -Serving POST /agentic_system/turn/create -Serving POST /agentic_system/delete -Serving POST /agentic_system/session/delete -Serving POST /agentic_system/memory_bank/detach -Serving POST /agentic_system/session/get -Serving POST /agentic_system/step/get -Serving POST /agentic_system/turn/get -Listening on :::5000 -INFO: Started server process [453333] -INFO: Waiting for application startup. -INFO: Application startup complete. -INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) -``` - -> [!NOTE] -> Configuration is in `~/.llama/builds/local/conda/8b-instruct-run.yaml`. Feel free to increase `max_seq_len`. - -> [!IMPORTANT] -> The "local" distribution inference server currently only supports CUDA. It will not work on Apple Silicon machines. - -> [!TIP] -> You might need to use the flag `--disable-ipv6` to Disable IPv6 support - -This server is running a Llama model locally. - -## Step 4. Test with Client +## Testing with client Once the server is setup, we can test it with a client to see the example outputs. ``` cd /path/to/llama-stack @@ -428,3 +190,7 @@ python -m llama_stack.apis.safety.client localhost 5000 Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. + + +## Advanced Guides +Please see our [Building a LLama Stack Distribution](./building_distro.md) guide for more details on how to assemble your own Llama Stack Distribution. diff --git a/docs/new_api_provider.md b/docs/new_api_provider.md new file mode 100644 index 000000000..bfef3a6b3 --- /dev/null +++ b/docs/new_api_provider.md @@ -0,0 +1,20 @@ +# Developer Guide: Adding a New API Provider + +This guide contains references to walk you through + +### Adding a new API provider +1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory). +2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers: + - [Inference Remote Adapter](../llama_stack/providers/adapters/inference/) + - [Inference Inline Provider](../llama_stack/providers/impls/) +3. [Build a Llama Stack distribution](./building_distro.md) with your API provider. +4. Test your code! + +### Testing your newly added API providers +1. Start Llama Stack server with your +2. Test with sending a client request to the server. +3. Add tests for your newly added provider. See [tests/](../tests/) for example unit tests. +4. Test the supported functionalities for your provider using our providers tests infra. See [llama_stack/providers/tests//test_](../llama_stack/providers/tests/inference/test_inference.py). + +### Submit your PR +After you have fully tested your newly added API provider, submit a PR with the attached test plan, and we will help you verify the necessary requirements. diff --git a/llama_stack/cli/stack/build.py b/llama_stack/cli/stack/build.py index 3c59e8c20..26aa35e16 100644 --- a/llama_stack/cli/stack/build.py +++ b/llama_stack/cli/stack/build.py @@ -13,7 +13,7 @@ from functools import lru_cache from pathlib import Path TEMPLATES_PATH = ( - Path(os.path.relpath(__file__)).parent.parent.parent / "distribution" / "templates" + Path(os.path.relpath(__file__)).parent.parent.parent.parent / "distributions" ) diff --git a/llama_stack/distribution/docker/README.md b/llama_stack/distribution/docker/README.md deleted file mode 100644 index 962a07def..000000000 --- a/llama_stack/distribution/docker/README.md +++ /dev/null @@ -1,28 +0,0 @@ -# Docker Compose Scripts - -This folder contains scripts to enable starting a distribution using `docker compose`. - - -#### Example: TGI Inference Adapter -``` -$ cd llama_stack/distribution/docker/tgi -$ ls -compose.yaml tgi-run.yaml -$ docker compose up -``` - -The script will first start up TGI server, then start up Llama Stack distribution server hooking up to the remote TGI provider for inference. You should be able to see the following outputs -- -``` -[text-generation-inference] | 2024-10-15T18:56:33.810397Z INFO text_generation_router::server: router/src/server.rs:1813: Using config Some(Llama) -[text-generation-inference] | 2024-10-15T18:56:33.810448Z WARN text_generation_router::server: router/src/server.rs:1960: Invalid hostname, defaulting to 0.0.0.0 -[text-generation-inference] | 2024-10-15T18:56:33.864143Z INFO text_generation_router::server: router/src/server.rs:2353: Connected -INFO: Started server process [1] -INFO: Waiting for application startup. -INFO: Application startup complete. -INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) -``` - -To kill the server -``` -docker compose down -``` diff --git a/llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml b/llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml deleted file mode 100644 index 9db019454..000000000 --- a/llama_stack/distribution/templates/build_configs/local-cpu-docker-build.yaml +++ /dev/null @@ -1,15 +0,0 @@ -name: local-cpu -distribution_spec: - description: remote inference + local safety/agents/memory - docker_image: null - providers: - inference: - - remote::ollama - - remote::tgi - - remote::together - - remote::fireworks - safety: meta-reference - agents: meta-reference - memory: meta-reference - telemetry: meta-reference -image_type: docker diff --git a/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml b/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml deleted file mode 100644 index 30715c551..000000000 --- a/llama_stack/distribution/templates/build_configs/local-tgi-chroma-docker-build.yaml +++ /dev/null @@ -1,11 +0,0 @@ -name: local-tgi-chroma -distribution_spec: - description: remote tgi inference + chromadb memory - docker_image: null - providers: - inference: remote::tgi - safety: meta-reference - agents: meta-reference - memory: remote::chromadb - telemetry: meta-reference -image_type: docker diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index c3370bfd9..c54cf5939 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -60,15 +60,15 @@ def available_providers() -> List[ProviderSpec]: module="llama_stack.providers.adapters.inference.ollama", ), ), -# remote_provider_spec( -# api=Api.inference, -# adapter=AdapterSpec( -# adapter_type="vllm", -# pip_packages=["openai"], -# module="llama_stack.providers.adapters.inference.vllm", -# config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig", -# ), -# ), + # remote_provider_spec( + # api=Api.inference, + # adapter=AdapterSpec( + # adapter_type="vllm", + # pip_packages=["openai"], + # module="llama_stack.providers.adapters.inference.vllm", + # config_class="llama_stack.providers.adapters.inference.vllm.VLLMImplConfig", + # ), + # ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( From af756183488bbdde5e396e562d76460a91960331 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 13:23:58 -0700 Subject: [PATCH 27/40] remove distribution/templates --- .../templates/build_configs/local-vllm-build.yaml | 10 ---------- .../distribution/templates/remote-vllm-build.yaml | 10 ---------- 2 files changed, 20 deletions(-) delete mode 100644 llama_stack/distribution/templates/build_configs/local-vllm-build.yaml delete mode 100644 llama_stack/distribution/templates/remote-vllm-build.yaml diff --git a/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml b/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml deleted file mode 100644 index e333a137b..000000000 --- a/llama_stack/distribution/templates/build_configs/local-vllm-build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: local-vllm -distribution_spec: - description: Like local, but use vLLM for running LLM inference - providers: - inference: vllm - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: conda \ No newline at end of file diff --git a/llama_stack/distribution/templates/remote-vllm-build.yaml b/llama_stack/distribution/templates/remote-vllm-build.yaml deleted file mode 100644 index 525c3a930..000000000 --- a/llama_stack/distribution/templates/remote-vllm-build.yaml +++ /dev/null @@ -1,10 +0,0 @@ -name: remote-vllm -distribution_spec: - description: Use remote vLLM for running LLM inference - providers: - inference: remote::vllm - memory: meta-reference - safety: meta-reference - agents: meta-reference - telemetry: meta-reference -image_type: docker \ No newline at end of file From 25b37c9ff789a53823c97e72f9e0f6acd6c7186c Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 13:41:46 -0700 Subject: [PATCH 28/40] Update new_api_provider.md --- docs/new_api_provider.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/new_api_provider.md b/docs/new_api_provider.md index bfef3a6b3..7f866ec7b 100644 --- a/docs/new_api_provider.md +++ b/docs/new_api_provider.md @@ -11,10 +11,10 @@ This guide contains references to walk you through 4. Test your code! ### Testing your newly added API providers -1. Start Llama Stack server with your -2. Test with sending a client request to the server. +1. Start Llama Stack server with your distribution including your API provider. +2. Test with sending a client request to the server. You may find more complex client scripts [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo. Note down which scripts works and do not work with your distribution. 3. Add tests for your newly added provider. See [tests/](../tests/) for example unit tests. 4. Test the supported functionalities for your provider using our providers tests infra. See [llama_stack/providers/tests//test_](../llama_stack/providers/tests/inference/test_inference.py). ### Submit your PR -After you have fully tested your newly added API provider, submit a PR with the attached test plan, and we will help you verify the necessary requirements. +After you have fully tested your newly added API provider, submit a PR with the attached test plan. From 3a7884345adf1c6969f56f5f57b3b2d436ab3627 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 13:41:56 -0700 Subject: [PATCH 29/40] Update new_api_provider.md --- docs/new_api_provider.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/new_api_provider.md b/docs/new_api_provider.md index 7f866ec7b..d023b65cd 100644 --- a/docs/new_api_provider.md +++ b/docs/new_api_provider.md @@ -1,6 +1,6 @@ # Developer Guide: Adding a New API Provider -This guide contains references to walk you through +This guide contains references to walk you through adding a new API provider. ### Adding a new API provider 1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory). From cb203b14b48bd88e7b581ccfd90b0b968326e004 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 13:51:39 -0700 Subject: [PATCH 30/40] update README.md --- README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index fef556a73..973a9a396 100644 --- a/README.md +++ b/README.md @@ -90,9 +90,16 @@ The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please * [CLI reference](docs/cli_reference.md) * Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution. * [Getting Started](docs/getting_started.md) - * Guide to start a Llama Stack server. + * Quick guide to start a Llama Stack server. * [Jupyter notebook](./docs/getting_started.ipynb) to walk-through how to use simple text and vision inference llama_stack_client APIs +* [Building a Llama Stack Distribution](docs/building_distro.md) + * Guide to build a Llama Stack distribution +* [Distributions](./distributions/) + * References to start Llama Stack distributions backed with different API providers. +* [Developer Cookbook](./docs/developer_cookbook.md) + * References to guides to help you get started based on your developer needs. * [Contributing](CONTRIBUTING.md) + * [Adding a new API Provider](./docs/new_api_provider.md) to walk-through how to add a new API provider. ## Llama Stack Client SDK From 606c48309e0985316b205a6925654fb6732afcf5 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 21 Oct 2024 13:52:10 -0700 Subject: [PATCH 31/40] Small updates to encourage integration testing --- docs/new_api_provider.md | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/docs/new_api_provider.md b/docs/new_api_provider.md index d023b65cd..0802c247a 100644 --- a/docs/new_api_provider.md +++ b/docs/new_api_provider.md @@ -1,6 +1,6 @@ # Developer Guide: Adding a New API Provider -This guide contains references to walk you through adding a new API provider. +This guide contains references to walk you through adding a new API provider. ### Adding a new API provider 1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory). @@ -11,10 +11,14 @@ This guide contains references to walk you through adding a new API provider. 4. Test your code! ### Testing your newly added API providers -1. Start Llama Stack server with your distribution including your API provider. -2. Test with sending a client request to the server. You may find more complex client scripts [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo. Note down which scripts works and do not work with your distribution. -3. Add tests for your newly added provider. See [tests/](../tests/) for example unit tests. -4. Test the supported functionalities for your provider using our providers tests infra. See [llama_stack/providers/tests//test_](../llama_stack/providers/tests/inference/test_inference.py). + +1. Start with an _integration test_ for your provider. That means we will instantiate the real provider, pass it real configuration and if it is a remote service, we will actually hit the remote service. We **strongly** discourage mocking for these tests at the provider level. Llama Stack is first and foremost about integration so we need to make sure stuff works end-to-end. See [llama_stack/providers/tests//test_](../llama_stack/providers/tests/inference/test_inference.py) for an example. + +2. In addition, if you want to unit test functionality within your provider, feel free to do so. You can find some tests in `tests/` but they aren't well supported so far. + +3. Test with a client-server Llama Stack setup. (a) Start a Llama Stack server with your own distribution which includes the new provider. (b) Send a client request to the server. See `llama_stack/apis//client.py` for how this is done. These client scripts can serve as lightweight tests. + +You can find more complex client scripts [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) repo. Note down which scripts works and do not work with your distribution. ### Submit your PR -After you have fully tested your newly added API provider, submit a PR with the attached test plan. +After you have fully tested your newly added API provider, submit a PR with the attached test plan. You must have a Test Plan in the summary section of your PR. From 1944405dcaad4e61561b7259f1a5a546de93dd52 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 21 Oct 2024 14:02:51 -0700 Subject: [PATCH 32/40] Update new_api_provider.md --- docs/new_api_provider.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/new_api_provider.md b/docs/new_api_provider.md index 0802c247a..ff0bef959 100644 --- a/docs/new_api_provider.md +++ b/docs/new_api_provider.md @@ -5,14 +5,16 @@ This guide contains references to walk you through adding a new API provider. ### Adding a new API provider 1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory). 2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers: - - [Inference Remote Adapter](../llama_stack/providers/adapters/inference/) - - [Inference Inline Provider](../llama_stack/providers/impls/) + + - [Inference Remote Adapter](../llama_stack/providers/adapters/inference/) + - [Inference Inline Provider](../llama_stack/providers/impls/) + 3. [Build a Llama Stack distribution](./building_distro.md) with your API provider. 4. Test your code! ### Testing your newly added API providers -1. Start with an _integration test_ for your provider. That means we will instantiate the real provider, pass it real configuration and if it is a remote service, we will actually hit the remote service. We **strongly** discourage mocking for these tests at the provider level. Llama Stack is first and foremost about integration so we need to make sure stuff works end-to-end. See [llama_stack/providers/tests//test_](../llama_stack/providers/tests/inference/test_inference.py) for an example. +1. Start with an _integration test_ for your provider. That means we will instantiate the real provider, pass it real configuration and if it is a remote service, we will actually hit the remote service. We **strongly** discourage mocking for these tests at the provider level. Llama Stack is first and foremost about integration so we need to make sure stuff works end-to-end. See [llama_stack/providers/tests/inference/test_inference.py](../llama_stack/providers/tests/inference/test_inference.py) for an example. 2. In addition, if you want to unit test functionality within your provider, feel free to do so. You can find some tests in `tests/` but they aren't well supported so far. From cf27d19dd550d10033f591d5100ac1d379b5688b Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 14:03:32 -0700 Subject: [PATCH 33/40] fix sse_generator async --- llama_stack/distribution/server/server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/distribution/server/server.py b/llama_stack/distribution/server/server.py index 6154432b6..84c369364 100644 --- a/llama_stack/distribution/server/server.py +++ b/llama_stack/distribution/server/server.py @@ -203,7 +203,7 @@ async def maybe_await(value): async def sse_generator(event_gen): try: - async for item in event_gen: + async for item in await event_gen: yield create_sse_event(item) await asyncio.sleep(0.01) except asyncio.CancelledError: From 4d2bd2d39ed8bf85bb20ed5af52090d300ecb5e0 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 18:15:08 -0700 Subject: [PATCH 34/40] add more distro templates (#279) * verify dockers * together distro verified * readme * fireworks distro * fireworks compose up * fireworks verified --- distributions/README.md | 2 + distributions/fireworks/README.md | 55 +++++++++++++++++ distributions/fireworks/build.yaml | 2 +- distributions/fireworks/compose.yaml | 18 ++++++ distributions/fireworks/run.yaml | 46 ++++++++++++++ distributions/meta-reference-gpu/README.md | 11 +--- distributions/meta-reference-gpu/build.yaml | 2 +- distributions/ollama/README.md | 4 +- distributions/ollama/build.yaml | 4 +- distributions/ollama/gpu/compose.yaml | 2 +- distributions/tgi/build.yaml | 4 +- distributions/tgi/cpu/compose.yaml | 21 ------- distributions/together/README.md | 68 +++++++++++++++++++++ distributions/together/build.yaml | 4 +- distributions/together/compose.yaml | 18 ++++++ distributions/together/run.yaml | 42 +++++++++++++ llama_stack/distribution/build_container.sh | 2 +- llama_stack/providers/registry/inference.py | 2 +- 18 files changed, 265 insertions(+), 42 deletions(-) create mode 100644 distributions/fireworks/README.md create mode 100644 distributions/fireworks/compose.yaml create mode 100644 distributions/fireworks/run.yaml create mode 100644 distributions/together/README.md create mode 100644 distributions/together/compose.yaml create mode 100644 distributions/together/run.yaml diff --git a/distributions/README.md b/distributions/README.md index 92640210b..1802f0c9d 100644 --- a/distributions/README.md +++ b/distributions/README.md @@ -9,3 +9,5 @@ A Distribution is where APIs and Providers are assembled together to provide a c | Meta Reference | llamastack/distribution-meta-reference-gpu | [Guide](./meta-reference-gpu/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | Ollama | llamastack/distribution-ollama | [Guide](./ollama/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | TGI | llamastack/distribution-tgi | [Guide](./tgi/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Together | llamastack/distribution-together | [Guide](./together/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Fireworks | llamastack/distribution-fireworks | [Guide](./fireworks/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | diff --git a/distributions/fireworks/README.md b/distributions/fireworks/README.md new file mode 100644 index 000000000..fcf74d809 --- /dev/null +++ b/distributions/fireworks/README.md @@ -0,0 +1,55 @@ +# Fireworks Distribution + +The `llamastack/distribution-` distribution consists of the following provider configurations. + + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | +| **Provider(s)** | remote::fireworks | meta-reference | meta-reference | meta-reference | meta-reference | + + +### Start the Distribution (Single Node CPU) + +> [!NOTE] +> This assumes you have an hosted endpoint at Fireworks with API Key. + +``` +$ cd llama-stack/distribution/fireworks +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Fireworks URL server endpoint. E.g. +``` +inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inferenc + api_key: +``` + +### (Alternative) TGI server + llama stack run (Single Node GPU) + +``` +docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-fireworks --yaml_config /root/my-run.yaml +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Fireworks URL server endpoint. E.g. +``` +inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference + api_key: +``` + +**Via Conda** + +```bash +llama stack build --config ./build.yaml +# -- modify run.yaml to a valid Fireworks server endpoint +llama stack run ./run.yaml +``` diff --git a/distributions/fireworks/build.yaml b/distributions/fireworks/build.yaml index 831643ff1..2e5cf0753 100644 --- a/distributions/fireworks/build.yaml +++ b/distributions/fireworks/build.yaml @@ -7,4 +7,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/distributions/fireworks/compose.yaml b/distributions/fireworks/compose.yaml new file mode 100644 index 000000000..552806745 --- /dev/null +++ b/distributions/fireworks/compose.yaml @@ -0,0 +1,18 @@ +services: + llamastack: + image: llamastack/distribution-fireworks + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./run.yaml:/root/llamastack-run-fireworks.yaml + ports: + - "5000:5000" + # Hack: wait for ollama server to start before starting docker + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-fireworks.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s diff --git a/distributions/fireworks/run.yaml b/distributions/fireworks/run.yaml new file mode 100644 index 000000000..c48b0cb7b --- /dev/null +++ b/distributions/fireworks/run.yaml @@ -0,0 +1,46 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: fireworks0 + provider_type: remote::fireworks + config: + url: https://api.fireworks.ai/inference + safety: + - provider_id: meta0 + provider_type: meta-reference + config: + llama_guard_shield: + model: Llama-Guard-3-1B + excluded_categories: [] + disable_input_check: false + disable_output_check: false + prompt_guard_shield: + model: Prompt-Guard-86M + memory: + - provider_id: meta0 + provider_type: meta-reference + config: {} + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/distributions/meta-reference-gpu/README.md b/distributions/meta-reference-gpu/README.md index 951120da5..7f209c4a9 100644 --- a/distributions/meta-reference-gpu/README.md +++ b/distributions/meta-reference-gpu/README.md @@ -11,13 +11,8 @@ The `llamastack/distribution-meta-reference-gpu` distribution consists of the fo ### Start the Distribution (Single Node GPU) > [!NOTE] -> This assumes you have access to GPU to start a TGI server with access to your GPU. +> This assumes you have access to GPU to start a local server with access to your GPU. -> [!NOTE] -> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container. -``` -export LLAMA_CHECKPOINT_DIR=~/.llama -``` > [!NOTE] > `~/.llama` should be the path containing downloaded weights of Llama models. @@ -26,8 +21,8 @@ export LLAMA_CHECKPOINT_DIR=~/.llama To download and start running a pre-built docker container, you may use the following commands: ``` -docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./run.yaml:/root/my-run.yaml --gpus=all distribution-meta-reference-gpu --yaml_config /root/my-run.yaml ``` ### Alternative (Build and start distribution locally via conda) -- You may checkout the [Getting Started](../../docs/getting_started.md) for more details on starting up a meta-reference distribution. +- You may checkout the [Getting Started](../../docs/getting_started.md) for more details on building locally via conda and starting up a meta-reference distribution. diff --git a/distributions/meta-reference-gpu/build.yaml b/distributions/meta-reference-gpu/build.yaml index ca786c51c..e76197330 100644 --- a/distributions/meta-reference-gpu/build.yaml +++ b/distributions/meta-reference-gpu/build.yaml @@ -1,4 +1,4 @@ -name: distribution-meta-reference-gpu +name: meta-reference-gpu distribution_spec: description: Use code from `llama_stack` itself to serve all llama stack APIs providers: diff --git a/distributions/ollama/README.md b/distributions/ollama/README.md index 43c764cbe..d59c3f9e1 100644 --- a/distributions/ollama/README.md +++ b/distributions/ollama/README.md @@ -71,10 +71,10 @@ ollama run **Via Docker** ``` -docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./ollama-run.yaml:/root/llamastack-run-ollama.yaml --gpus=all llamastack-local-cpu --yaml_config /root/llamastack-run-ollama.yaml +docker run --network host -it -p 5000:5000 -v ~/.llama:/root/.llama -v ./gpu/run.yaml:/root/llamastack-run-ollama.yaml --gpus=all distribution-ollama --yaml_config /root/llamastack-run-ollama.yaml ``` -Make sure in you `ollama-run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g. +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Ollama endpoint. E.g. ``` inference: - provider_id: ollama0 diff --git a/distributions/ollama/build.yaml b/distributions/ollama/build.yaml index d14091814..c27f40929 100644 --- a/distributions/ollama/build.yaml +++ b/distributions/ollama/build.yaml @@ -1,4 +1,4 @@ -name: distribution-ollama +name: ollama distribution_spec: description: Use ollama for running LLM inference providers: @@ -10,4 +10,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/distributions/ollama/gpu/compose.yaml b/distributions/ollama/gpu/compose.yaml index 2e3f85e45..7f9663a8d 100644 --- a/distributions/ollama/gpu/compose.yaml +++ b/distributions/ollama/gpu/compose.yaml @@ -33,7 +33,7 @@ services: volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file - - ./ollama-run.yaml:/root/llamastack-run-ollama.yaml + - ./run.yaml:/root/llamastack-run-ollama.yaml ports: - "5000:5000" # Hack: wait for ollama server to start before starting docker diff --git a/distributions/tgi/build.yaml b/distributions/tgi/build.yaml index c3950e900..2c0ca1d33 100644 --- a/distributions/tgi/build.yaml +++ b/distributions/tgi/build.yaml @@ -1,4 +1,4 @@ -name: distribution-tgi +name: tgi distribution_spec: description: Use TGI for running LLM inference providers: @@ -10,4 +10,4 @@ distribution_spec: safety: meta-reference agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/distributions/tgi/cpu/compose.yaml b/distributions/tgi/cpu/compose.yaml index df7c74489..2ec10b86c 100644 --- a/distributions/tgi/cpu/compose.yaml +++ b/distributions/tgi/cpu/compose.yaml @@ -6,28 +6,7 @@ services: - $HOME/.cache/huggingface:/data ports: - "5009:5009" - devices: - - nvidia.com/gpu=all - environment: - - CUDA_VISIBLE_DEVICES=0 - - HF_HOME=/data - - HF_DATASETS_CACHE=/data - - HF_MODULES_CACHE=/data - - HF_HUB_CACHE=/data command: ["--dtype", "bfloat16", "--usage-stats", "on", "--sharded", "false", "--model-id", "meta-llama/Llama-3.1-8B-Instruct", "--port", "5009", "--cuda-memory-fraction", "0.3"] - deploy: - resources: - reservations: - devices: - - driver: nvidia - # that's the closest analogue to --gpus; provide - # an integer amount of devices or 'all' - count: 1 - # Devices are reserved using a list of capabilities, making - # capabilities the only required field. A device MUST - # satisfy all the requested capabilities for a successful - # reservation. - capabilities: [gpu] runtime: nvidia healthcheck: test: ["CMD", "curl", "-f", "http://text-generation-inference:5009/health"] diff --git a/distributions/together/README.md b/distributions/together/README.md new file mode 100644 index 000000000..227c7a450 --- /dev/null +++ b/distributions/together/README.md @@ -0,0 +1,68 @@ +# Together Distribution + +### Connect to a Llama Stack Together Endpoint +- You may connect to a hosted endpoint `https://llama-stack.together.ai`, serving a Llama Stack distribution + +The `llamastack/distribution-together` distribution consists of the following provider configurations. + + +| **API** | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | +|----------------- |--------------- |---------------- |-------------------------------------------------- |---------------- |---------------- | +| **Provider(s)** | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | + + +### Start the Distribution (Single Node CPU) + +> [!NOTE] +> This assumes you have an hosted endpoint at Together with API Key. + +``` +$ cd llama-stack/distribution/together +$ ls +compose.yaml run.yaml +$ docker compose up +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Together URL server endpoint. E.g. +``` +inference: + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: +``` + +### (Alternative) TGI server + llama stack run (Single Node GPU) + +``` +docker run --network host -it -p 5000:5000 -v ./run.yaml:/root/my-run.yaml --gpus=all llamastack/distribution-together --yaml_config /root/my-run.yaml +``` + +Make sure in you `run.yaml` file, you inference provider is pointing to the correct Together URL server endpoint. E.g. +``` +inference: + - provider_id: together + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + api_key: +``` + +Together distribution comes with weaviate as Memory provider. We also need to configure the remote weaviate API key and URL in `run.yaml` to get memory API. +``` +memory: + - provider_id: meta0 + provider_type: remote::weaviate + config: + weaviate_api_key: + weaviate_cluster_url: +``` + +**Via Conda** + +```bash +llama stack build --config ./build.yaml +# -- modify run.yaml to a valid Together server endpoint +llama stack run ./run.yaml +``` diff --git a/distributions/together/build.yaml b/distributions/together/build.yaml index 67ba2eefa..49eab859d 100644 --- a/distributions/together/build.yaml +++ b/distributions/together/build.yaml @@ -3,8 +3,8 @@ distribution_spec: description: Use Together.ai for running LLM inference providers: inference: remote::together - memory: meta-reference + memory: remote::weaviate safety: remote::together agents: meta-reference telemetry: meta-reference -image_type: conda +image_type: docker diff --git a/distributions/together/compose.yaml b/distributions/together/compose.yaml new file mode 100644 index 000000000..75c96b686 --- /dev/null +++ b/distributions/together/compose.yaml @@ -0,0 +1,18 @@ +services: + llamastack: + image: llamastack/distribution-together + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ./run.yaml:/root/llamastack-run-together.yaml + ports: + - "5000:5000" + # Hack: wait for ollama server to start before starting docker + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-together.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s diff --git a/distributions/together/run.yaml b/distributions/together/run.yaml new file mode 100644 index 000000000..355080f61 --- /dev/null +++ b/distributions/together/run.yaml @@ -0,0 +1,42 @@ +version: '2' +built_at: '2024-10-08T17:40:45.325529' +image_name: local +docker_image: null +conda_env: local +apis: +- shields +- agents +- models +- memory +- memory_banks +- inference +- safety +providers: + inference: + - provider_id: together0 + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + safety: + - provider_id: together0 + provider_type: remote::together + config: + url: https://api.together.xyz/v1 + memory: + - provider_id: meta0 + provider_type: remote::weaviate + config: + weaviate_api_key: + weaviate_cluster_url: + agents: + - provider_id: meta0 + provider_type: meta-reference + config: + persistence_store: + namespace: null + type: sqlite + db_path: ~/.llama/runtime/kvstore.db + telemetry: + - provider_id: meta0 + provider_type: meta-reference + config: {} diff --git a/llama_stack/distribution/build_container.sh b/llama_stack/distribution/build_container.sh index 056a7c06c..19f3df1e3 100755 --- a/llama_stack/distribution/build_container.sh +++ b/llama_stack/distribution/build_container.sh @@ -15,7 +15,7 @@ special_pip_deps="$6" set -euo pipefail build_name="$1" -image_name="llamastack-$build_name" +image_name="distribution-$build_name" docker_base=$2 build_file_path=$3 host_build_dir=$4 diff --git a/llama_stack/providers/registry/inference.py b/llama_stack/providers/registry/inference.py index c54cf5939..5a09b6af5 100644 --- a/llama_stack/providers/registry/inference.py +++ b/llama_stack/providers/registry/inference.py @@ -55,7 +55,7 @@ def available_providers() -> List[ProviderSpec]: api=Api.inference, adapter=AdapterSpec( adapter_type="ollama", - pip_packages=["ollama"], + pip_packages=["ollama", "aiohttp"], config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig", module="llama_stack.providers.adapters.inference.ollama", ), From b1451afbc840ac2874d863d7d4a9a72c331c7927 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 18:21:30 -0700 Subject: [PATCH 35/40] Update README.md --- distributions/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/distributions/README.md b/distributions/README.md index 1802f0c9d..0cb812eb4 100644 --- a/distributions/README.md +++ b/distributions/README.md @@ -6,8 +6,8 @@ A Distribution is where APIs and Providers are assembled together to provide a c ## Quick Start Llama Stack Distributions Guide | **Distribution** | **Llama Stack Docker** | Start This Distribution | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | |:----------------: |:------------------------------------------: |:-----------------------: |:------------------: |:------------------: |:------------------: |:------------------: |:------------------: | -| Meta Reference | llamastack/distribution-meta-reference-gpu | [Guide](./meta-reference-gpu/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Ollama | llamastack/distribution-ollama | [Guide](./ollama/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| TGI | llamastack/distribution-tgi | [Guide](./tgi/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Together | llamastack/distribution-together | [Guide](./together/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Fireworks | llamastack/distribution-fireworks | [Guide](./fireworks/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](./meta-reference-gpu/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](./ollama/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](./tgi/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](./together/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](./fireworks/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | From a2ff74a68602c1cd2bc0a5f0abaa0b6976639d99 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 18:52:48 -0700 Subject: [PATCH 36/40] telemetry WARNING->WARN fix --- llama_stack/providers/utils/telemetry/tracing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/utils/telemetry/tracing.py b/llama_stack/providers/utils/telemetry/tracing.py index 9fffc0f99..207064904 100644 --- a/llama_stack/providers/utils/telemetry/tracing.py +++ b/llama_stack/providers/utils/telemetry/tracing.py @@ -152,7 +152,7 @@ def severity(levelname: str) -> LogSeverity: elif levelname == "INFO": return LogSeverity.INFO elif levelname == "WARNING": - return LogSeverity.WARNING + return LogSeverity.WARN elif levelname == "ERROR": return LogSeverity.ERROR elif levelname == "CRITICAL": From dbb5ce43fc0d8e89902f10e2e89d2f0fca1403ef Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 21 Oct 2024 19:10:01 -0700 Subject: [PATCH 37/40] Bump version to 0.0.43 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 767f06be8..513642500 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ blobfile fire httpx huggingface-hub -llama-models>=0.0.42 +llama-models>=0.0.43 prompt-toolkit python-dotenv pydantic>=2 diff --git a/setup.py b/setup.py index 466ca655f..df2c2d18e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ def read_requirements(): setup( name="llama_stack", - version="0.0.42", + version="0.0.43", author="Meta Llama", author_email="llama-oss@meta.com", description="Llama Stack", From e2a5a2e10d64cae4eb46ec8b1045640bb7b8fd5f Mon Sep 17 00:00:00 2001 From: raghotham Date: Tue, 22 Oct 2024 10:15:58 +0530 Subject: [PATCH 38/40] first version of readthedocs (#278) --- .gitignore | 1 + .readthedocs.yaml | 8 +- docs/Makefile | 20 ++ docs/_static/llama-stack-logo.png | Bin 0 -> 71620 bytes docs/_static/llama-stack.png | Bin 0 -> 72643 bytes docs/make.bat | 35 +++ docs/requirements.txt | 3 + docs/source/cli_reference.md | 485 ++++++++++++++++++++++++++++++ docs/source/conf.py | 53 ++++ docs/source/getting_started.md | 430 ++++++++++++++++++++++++++ docs/source/index.md | 40 +++ 11 files changed, 1071 insertions(+), 4 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/_static/llama-stack-logo.png create mode 100644 docs/_static/llama-stack.png create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/source/cli_reference.md create mode 100644 docs/source/conf.py create mode 100644 docs/source/getting_started.md create mode 100644 docs/source/index.md diff --git a/.gitignore b/.gitignore index d0a5f0056..f6ef5d9ca 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ Package.resolved *.pte *.ipynb_checkpoints* .idea +_build diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f89fc906d..f114dbf9b 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,7 +17,7 @@ build: # Build documentation in the "docs/" directory with Sphinx sphinx: - configuration: docs/conf.py + configuration: docs/source/conf.py # Optionally build your docs in additional formats such as PDF and ePub # formats: @@ -27,6 +27,6 @@ sphinx: # Optional but recommended, declare the Python requirements required # to build your documentation # See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html -# python: -# install: -# - requirements: docs/requirements.txt +python: + install: + - requirements: docs/requirements.txt diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 000000000..92dd33a1a --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/_static/llama-stack-logo.png b/docs/_static/llama-stack-logo.png new file mode 100644 index 0000000000000000000000000000000000000000..1899a0fc7f4a814f358e54ed3539fc29c681216b GIT binary patch literal 71620 zcmeFZ_ghnG)HWK*hy}q6gMc)JK?Iayp%)89Q9yccBBJybngp;xq|OXMP(qhZKstmf zAShLkA|+HI2+~V{P`(v>&$+(8;QVl|cdpA3ce3|>%6jf~uY0Y<>-(B&Cy$>wjzAzz z-np%;gFyT-Lj7~}2waiAcJCqlbeC*P+w~&k#*$dZ^j2zF#Qo~MP zy32Q4FO9%C=FJqjk1Q{Eaw;&u96c{qx3_5uuY~_vwv+l#9{yDS*U=Fg>QBkE&*ToT z=$&awV!XOc&6-BkBAEM zOe*quO_xPs@Mn48Tbj4v@G7VaT}nOioejB?VJ))6|A5mRQRnoXdj9Xdfm1K6E@ZCS zbeRx@=0F)147l?m<7a8NG&;BQZLh$0#909dvd*TLgo|1Q4wpsSJv}-mB}38}Hh>SU zMgTvSZwjRwE9bQ?=(kx2Z=XWWV=4;gsA!U?1#j=td%ii(K4!m3@?k>`$dJ(Us?-}V z!i}!UMKp7$65{T9FZ~i+bUh33BtLP_phQV!nF=~u07ms}@3Er%lfrzpF2i4}M%0=# z;#R1iB?-7P1a;jOx|BA2eLdAl@WuK$J_8w&)9=UMrjF~jabImD#%0Ls^6@%lw(7A` zXL`6XmUoXr6z zxwkKeF-GhX0FU*RJ5+4FE@Jz#UGn8ZX>}w9E%TZRv36?XO!da{Px7=qSZ5Nx=o&AFv2^x4w@y znWj*|nsk%;#8YKb0-msC;}-#isQ*0w za^cpRjrx;5!ZCQvY@=lWWud%Z0dXBA(7RWO;`Q$ybMLJBJyQ*s>3Kn_r3IERz9=9> zVSGD%#|TKcG3(Q94#atZbWaZCaI@2LNf_pPi8C#HVE<dk}l1!+Z#(Cr8;e#MjQ3H37wlHV<=vC(z0V5^O z)UR7mm)OW$Me6y>pT5yLCco#Gco(kDV*3^hBZiYL2k@MS_fsdWYG{q3^cqit8P?Cj zIN4cqhr3!)^Hi5VF0)^bqC+g-Ym-juTUe@Fb#c+`SCMnkQ4hndm7)0IE3QXj(`-D5 zglCEV7>tIpBmV<9)=H?5qM^W**-b)H|2q>EvI1;x&__y}OWj^SfdL_XTm1@6Pat0JD9`|R^zO=o2$!Pm0C9}&kCbXxuSCj-|jx*tGwGYxfPRVLbY;)Rv~P}N0Inv`==dlEK`s=`~l@C+WMFap2L2Tx|kmv;bTTo$H4nL zJK6BO5B5arOZ+41xq~E(N}R{v?#uLk*t^lWEzCDsx;XXr^zZ2xAYnTn+lGHunO97l zh{dgG%%5lSWJQ+a<9&h$gwop%k4Jqhb(0Ay^G6aVZiPSob*FP%ln}5q?6rC^?2uyp zUth4+zxk{Cv&<6TWQH4OSK;H|<9H3@NQp0Z9=~4q14n)MYup;jGk2}UY7{UYf!O*J zX7NIQ232QIYLrgi$b5IR2hVB+TU8({F@lg8uB3N7+fj z@tI8My!QX=bv~6&^GE!O9EkNSqQCe0@KLeDrOwHM!@t*y{TM9hdu&fCikaa%^?FtR zNqK?vt1d+9sQuLekj1$H5lM zuljz$g?eSMU+K4(C1-L*L`^oS;C(+H_Hp#s5EW!R0JW2)o_oRNaU6UzOYf(j4JL)PDin z+A@b}nBTYJ*F$8?5lB0L*=tvh$C50!C{in5D6ju)8f#8FrARm%Fs$o>KnM!L@Fv`s zb1^=|G8V+F)?oBQ-{-f?Rhs7LTR5I?U#Z(iRif9H*#s~Lr+8a>ez|wXpXs1M;NI19 zkKc>=YRzhXVL8WE4B^U`E8UU!w~Ma&fZgS`>VEQ?er3&TQU5W$oD&FFor7<1R5eA? zJ>TjOBArvr0V*hd9 z@^?d|+qdN{^c+Q@KCEiJ-FLS%$SHP7*-9}AK)D|wT(jE+%R{fUL--1`)4kMK~f zVy&>kz1G(}!_BSW?7>0QAB{0C8L(#23%YS(VoIKXv;EfG>BX?RBiN=LrEY9;ZexL{K8wsOo!q*#T=wx$7*`FG;)6!@9Yd zBxK=2u3&B6?!lP`hnab7&w~dv^SjC|#-HeE$wfK`B&~f>zTc!faH}TT6CrLv>m7u= zAd(8_1~sgS=xiAY^JXAocmL^ zdVRU~&!=CFzi%ZgH&OUzWxC#vv43T^6wmtTh^MaA1hKyThOyE(z+d6Fo88BUefZ(%h6FcM6PH| z7bWFm#`N=<5nL02fb7?SFy0lyr1oD)jmJKYOkVv zvF-JSw#eFNt)Hp5>l`tw=$%4R<1asW3KhJQfx@KHFC4tal%IRdz#fXm-#kg%T5KSB z7hjm;voo)G6v zjgQ}PrOR+KE$pv;a4E4YM~`%Vk}Sg0A7gK!pYyFJ^`k`W$w=RD6wZn{5$M>G4Fb_+ z6D(K%Ivs0Z&EK0+8MXM|FLdQXs;PvR9sgx%hpN!hN30GjCHk+wk=YtbKr2 z`DEe`;E6<8IJoc}oFi3h3Dra?IU8*27`nZ4<43=}CcAj~$Ua)Gdt{S=!otmf@Hz5K z?s`-!)a}OX-cY%~TqY=g8X_y;-+uq-yi*uDxNd8Fhd~;WpKq_l`yV zf#CXl+&d_=)%F)mqB3E(8||6fDrpn9bUeISaSlI|5W<>`HZ{m84wxLPEpQFDRsXr_ z@o~St;|y;g2OXD)2MjT4LCaqrF^+&RK<^(ahLj;!9F7iaElBd!MqrFPeFYpDgopL> zO0$f@x~jn9_g@eEhk9kB>}gns7w(fySDBis;~whc6HFZ5ufnZu_O{R?63@FbNZ&j* z1RU6-B;L+1e^jLj;+-|zn%_$idYR5t6df+Y{F6y>Xz7+R2WpjYGrMD8zp&l>Aicqj zZw_}>y0oqTWK+}QEMnX|Z7#bIuQOh7jr!K*6!^ix^&ZR_4kKMm z#C@04_}C}>z26}4;!Wk`^w2k8Yap!5pmERKH}2szE<5Da&a}N@wfC2Pcu$Q(Mt9!L zO+ULc8>reTu$X>$KT{P>ySS_9U6NkH(aV~YY6pfO2uTm;UWC2SBj;{E|Q)nj->j<&RPOHled#ErmN4&dXhR=rpqI)wWt(`?ga z=M!r;g>pkncdi!bk%f<`xD-EyUf+uZxg#qI>}-IaA7(&++a5 z>tS(pNaicqHvQbXt>NJ@o+K}O7^n1xkba%i@m=fHPnSi<8|D;8P0`K2v9ay%4q#3) zpPHgWSOTfQj7idfHmY$ZkITL>t+dqepT@V=i+Jl|;BwSVb2 zu~?k%I)ZUt>$OccK@;-y>@1s`BAoN?O;s+`U3tWe2nHg9D+Su3R^Bt$^0?0ACs$2W z)C~#&btSytAVFOJgA;Yj^9H5$QP%qB#~eFF1x60q zh5ePL`mhE+x!W9uoYEByV&?Q>bxibxH-3`sJs#HN@-H2{UnQ(&EKSKbmZ9h}4f3sq zRlQZ(>U7jWB#1|vvC`Ia0C+~f(J_JZaKPHQlk6#~sLWQ<<=#2)+)MLED}$@whg>5E zi^BdvVI|TvNg1U_xb_$E$I;53{2^~*?E55h_3RG5B*D@=C6YV>WV)BpmK&Cj#|Y1K z{r)Obs%;qO(4f}>Ooc6dW*Nx zWlN>pc}m1yE+qMJ6QAxMWMKPQ3z`u@Y`~qPwIy%Xyms#EB7=9JZhIR2!hC;+Yk%#^ zGfG$M+K_4*-<;YbMZ=Gxnj}G z+-FMQm-BP&PIXa!UFD^ct~}Lrjhk9EK-6^ADXV@`^C{O5Tf)!e#Qg-Wo2Zo|=e#9w zC-I$MsxVtm2alq@5jZYD(8nI?p1pUTK>E@m_;)Pc?@TsegXU50OnSF#nt224f@@Z~ z*_ut_hLx5EIUeMnmRK?qS*g`4Ay`y$ET~ximSYnl;?talcREW6S+&SrYSipz4G77exto7wWd#W6g1E z%d1Bk3zt~#neW={TPb7}r_BmEp*-58N8wW|Tv9uI#`|4TkFpRQa`ifL)i)2T5} zmwlms&hel(9HSV9opph=dZ^|tbCRqt@}oH_+O(vRHj^l~Rq;PIH$Vo1Qn2WL_! z9WqX-2Ady@(y?C1l_lw4IZ66sXpPUm5rXs>&Paw%!qA4r$tP>Qp)}J2nztPz^J-*Q z_mSFYXNptjcFt~qcJ@rZPQs13bB=+(_y{Vp2crCT`JF!%Xc3t}oK+{ZF(pyX=^4p; z>x#-v>WS%P~^Ev9eEQSq(CQ8hj zTy-NwIti$B**cM~`0|+68t$(i1JybS3oHk#OlX@!HNhQ4O9#z6W(ZJoYGwRy-)Z@ z>vJ|LBgobx>CT6i8n8NW+gN0G?!OKk<1!B1=#kq5I_0)-#y+8)<1cGDgf1-Qk#xt^ z^2jL8l;cBMOXkYEmq^tgkiy717gw?qXi--`=`St`_n+ylo_+Q|D|ivx@+5+Mgx7E6 z#Y5e~emJZ)ROAd?;5Chi`RV3Nd10JBL{Uc_x%Hi?{x9m9U04e#?{-@CZa1R0m{H^l z2*uRBiMa0_L9w=K)n~F{B4$>!qRlHubKGmPhkkF9*+r4QLA{(cL4nT)={l7ItWp;h zRAa=~>shwr`I(FTKB6VNyMWCVA1~!UUuHdbzhN`f@a$nc{H&P=EYcy)Znyz};1p3c z%TOVI(}q@k*1|)7BU{5h=IOC~MZJsL9oSkWg(1U3v!K4u*f3{JApUUO6ih#LtA*s7 zjdA(n`aEhYNI(K+V&($#H!qw7&J?70bmUpI2xp2#Ofo#*$sgm)-`j>;D~-4>qu1%n zO7uUFC~Dn%lH0m*K65pT>KUUAeR1viHxmj5_3}z{jOGQX0LQeQdfdj=g4<93dXmSm zl0>d&I67VSmMcqx7velfHMo0&M>k{%%qNtBlWSgqdx*lVwX0@vSwlX-AgKq0EMZf_ zd~f~AQN6yQim&tIYt&^k$XQdFR`$Y+JQqE)RQMrF&yE~kK;-hx>(jM#eGaTJ!Ts;9 z@tD1L;s4NE6;Vai`t_K*{}wcI&c6wGjfv0pzn_K&4f$B$K9La1R&iShpW)shM5Wbx zefj0iRB~}>9L+VtrHUQJ(uzbqW zQsZLQ{7vwbRQJ-zCT44|P`A-Y%k$vl^~OzOaIbNvpUHjYyg@k&wArC}FdHoJuF&2U zLau)EjOn@|qte{eB5YM!&{v8LDUNWJhnGPxl;r zOh1yRlR!+dG_tT!Y-%f@-Jbi>$kO2yD4y$yE8w>IOGjNwqP^>nV&Dtnmmq7<7sIh2 zgE4acRD1#fP~HQ) zkZGB-6~Z7Q(o`gFRAFsiN{z>5qq0tZ0!(sEy=BNC=Np98tjMv_MLL8g5J$a@$EsCQ zy_sZSVjRd~pk}phO1KdH-jh$YZ2Jh$TKicY%wM{$2~iH|t;S#xijMAs>;{cPqW{y6 zbGsY=hL{EHuu*xqmBCLR@0dB983e0~DeGv@B{YrIefq`gte{n?Kj9^YrkKR~d&Dt= zMh)hZ(LFtb%U<;{z2-b6F#4v@P`5DO%mqy5J(lHNspi!_69su=Ta_a6X=DMXyi12j zpYOql9M(;|rTtRi#@p?m@zNhhHnrM?xx^;74UaP3jIw^JT%NL&pE-nBen9IzegPCF z^(QRIy5=^LdeQ$Ckjk*7QX)d8Zz~wVgj=FZ8k3>C=BM=e=&GGFO?r@-Ij# z0bx1FEMd!MO&zrfaF>#H>N%*&Hd~_N$KWA;IE20d|NGDM>nJHIb!njBqvRJa zr+yjPywH-MFFUjMJ>Tl0HAQl#Qg(W{HB93)s{>6?J-1J7Q4X3DsT6#-T5DR7kh}+e z#e6DYoLT*pYhOhKX=kM3nZ&SOetDjmi#^qyiuAw3u3nitPKeUntJa@48jc;w?lF3+ zf(tl=!Kp>zKAG3cii4_u*P4D#{;6R!_tQ;~1qLh`10x$}`oPY~7ZOv;>-i%`>kZLJ#_4-exSe6|ss z&iXfk(!gO7s*YcC7Vh=QCUrUmI$C6h0KnGV0c|DelT7)_Mz?$wPvkL#+V&V=LZhC+ z5K*}a#NK?v~uDi=Fn1V6# zUkL32v{lazxkXps2Bxm`(|4bb7!ra~9%0<0>-~ydQ3X|RSCNzxHfCs+j#X49e?*)6 z9=Q_e$xS@SmH4&o`mc^jt*cr6IF^N{PPrx~7)&SZa(=MVkZMgdCmgWzDb;K%ia**! zb<}ec3qnoxa*E~3uKQY4MSN2_A369T02bzki)td|^@!2c%!!P{G*(&RSepH0`gfG& zbLGsue3KzSKx+OhhSU=pWAt??vx@B-g;DjnI!`S(MUwO5A*zK+)XlMFU0+shByg$vq_X)xzsYiLdoF z7kiA!di$n^`ZVH?z%3wGo_-h{I3DA%p-3o{i0_XG??_{I%OvznLX!2O5`#$Eqqz(G zh|y2do6mh>blz$FYK=zq#-{}$%@KrrJr{K^ZJ~==LLCYRyUyK&XIlOk5xEH8L0;Ukh`|l$ev@XKTM8^I4FpTrX?k{{oV$iytIQ3-Po9XRh(fKQsM*a z;&e`^#yU$`-P1)%L8xE|BKgcSIh>63Q+>$1`8^PMsFmR#WU5vZ!4tKz*LVc4ir^Nc zdhD7gx9~QjrkAv29)lOPiR1c+Yrm*x;YrIQ#=ca86vzPLO<0{$EynjX)PEL*4CKH7 zs?kqDH-Us}4ezHyV` zCRtXJnrQ?j?OF^eOXJgjY__$MfqD+Cb0S^W^aGESvpV#ZWgE@M^YBRNTf{yc>U#9; zR>#n@CBQp~+0=xRxOcu=k`(z)=b!q+${wcm=;eYK6*?0=5@Y)ay>2XxOvAj@kt^XU z964}yxE_O@LQFJZs-G|Q$8U)(q8kDiN~Q_2RU7%ruWt-a*pDk^B9{4Ry`}kW6W@Q>^ZhLhh9w$2Nx)-!^w&L}xLei=DXh^> z)1kyB?%J;PpX@bAtg;VSs=Ffu5E1_z{mCROv{Vx+a|#L@2)$XQA6LtC5-hNNz1K({ zVqi`ng@EMc0r9I|{P6zk-^*eN|}_bE62h;Q9^6Ehz{EIZV2AT1}VOHJOO)So2i45ecO zK+IK35aB(|YA^p?FRw2abvQ--M3WURRBHJnT|qUsg4dS1($4K%lrJz z(v7xbA5h9>>-YD~wFG4mR;Q?3`}#pE=ZwbsTClJBV%mxH_D4%&AR(z$yhi3<=f+TL z0|>9_+HJ0L_URYBCJ0vMKajV1u4(<-v$2T?ve|WS6kn(dSU@-BHCV?|M`Zw`a{6S# z1~nm_zPc&>c{d|47&uyrxz&cl0gGd;`U$b!3eKeNJt?$oLw;<8Y>482Bo2t6zSj-U zTHHcd6@iKHlxaUn?@&f#Fb=V$iK>HY$^D;$P6JIe;1TPB%wRfp(Pm5VU`8UcuaE(f z<+|#1%1u%+>b2@Z9Hsg{aNyE-Q#&0N?vP6_2u_n$TC7$Dy1LhnFI*Jv#bCC?gEiRO zJRaa%G?cv?9_XfYQb!rQ*21eG<-~Kam&S4j?KFEv;+flJC{?ycgnr>nO* zI$}KuYbN2SKG*>M@`R?=BCHszSxbfC&+?Mj6J3jzC6MTzKa3LimCIORGZlsZ$OH|9 zH|=l5Vte%IjfT;*hRPmOyhRzHZo~OG^X=k1y-b6zgj~JMcdH(y^V{O!CgDT*yT+?`_=nHA*7e(Hf z^?$$MakC8EUDT!fkMNf1SZ5veVYTA^1Be9F*m4Zeii++BFI3Vc`I=Yc*Q_9xP~k{l zeZWfoP@yzsg@RbURwv;_OiLFy!nz`{KM|ycXEHq)Dbz}0mH z*`09ou2A2_%J8Xuik&`zeSB_{<$sRV+3DiOjLcOfqaU>OGRTTXdWE6CV?Y|n|6{=^ z=KkaM;$fu?NQbRe_$6MxTRqc0X>rd-aa*I8Sg|QgzsS4^sH;ehwcYqBuNe7?W5{Dp z-eP}0t-m5oyWQnLxnqy@joU~tHcZ$Y=RIy=i=7g(e9>wbf?6+bX%&+p&GXFB)>|DL zv3}Ptu`E7M4>Si|Av1ONUC5(B0H=#-C&Yadofrd0`VLoIP~JIhbd;xn@=i)o(w45> zWO_!x%(AH^2+c?-Y`1jJv}sB9e*jCI?U=S0+}yHzuTz*xH0<Jv%g3nrCkP4=5MBT6j_92)>0HP-H{>1(Ub~@p=?^8Y)yi z0{2ph!C0onm+otX^E*cQ_%4@beQ$*I*Bvy-FPEOIEml|fCEUjm4%tkDU8^rk5G^w4-Fg;86PSB1udQTVw zVH6RR<4-;DC0J23+pr6*c~QjD0Y6^HJMWW*Te~<({=`yT;Qw>-N+7ux&7`w$sprcp z(;%}`O4WowP*Jhn?dU@DW>!EB7UIzsTkcImTB`>7MRBMW$c0L-GIBTmYxSlo`syP* z=WI9ZEx*1;qz^CkUxOOGCxc_H*LpE-W;NaAzVa?H+mk^B+oyQxKJTTCjeX*+BSi%& zsZXOj_C+j<7FL#vXGd+gISB)2@K;sA>3YS?`5VYo7A>r?TdoKb+>FOEu^@3ou#_S0 zL7x76a`ls1=aFWi5jD0yLAdq*e}_;SX3ek|J?cX+vt2$nx91%Yt@BRinCyOi`SU?2 zKe$ZD?c1F08eW|z8X4wB>2C*ED(+V11qE!arSAH1@V0cePpwVt*19{`=s2vjfs`pW z*esqO3Ucs3RQ&AeZzEfk3|3GgE`D;Jy*YlS_UKT{63|zLcM#;g-^6pCb>^FF3EePo z;}Hz3%L<>*_T{q)pXu73EGj|~)%dwc(c>PJ^n#~XkB}}W2EJ;1h+VpMl8mk$yA=PP zY2hV#>fbBnD{ep9&D$~%f@2lm6RJf3N25mt&H5@DnJ{0u7}M?5K*A z`=hm(f~2HWA%qXpGr4f8FH`X4MH-5Vy!t|bN~KDX+`_=ueu7Sx5i5VI7{?HV%Tb-% zJD>j>j_XepF7Mu}cAlErT~A<{SQZ2Xh9k23@wL<){4@kT@w53}#K6^Sv4V@o5Q%@g zh9E^a(>0w~iD*xeAr})V+I9N@0u5iP+!NC^;cWfzhd5>9pOv9X!Be@?0WtfFpDE?5 zii$_(hQ)7jCpv%}1?W0k?iB9ZX<_5c;#)EfNX^8*FzO2P8AD zJ(ws2P*MG*n6V)*X%Ihaz|>z9P7@&~BA8`3?e$spS)23&CMZc%N{+s>G&NuzQTS)^%B%E{zY4FJoL#^t10{o9HNne2Z(1+ zp}xr6XMf~+Mzio9*=?s~^HxP|n5fxU!q%2+S?kj^VZnbKEPl%Tr=de*iPYCLz@u??TNSU4pe7`iox z6xM#AnUlz$tVp)EOb_csW36gNye#CbU4e1^a`;Oqi4AxQZ9)i3KwIKmxhu7jI|J$` zr#wJKI}SyWhWPk#&(`BZLY?!qW*8GJ)mbV|hyybyTv9 zj~nIcrf%9I_29%o?V;ZP;3b8l2EzDe@HM_bQ=S7;YnA6D~!)*WR?3s)pH zYa*16{G!Biu{^d1cfa_Z7ElngH8ZQJ=(4fQrJrA7tky&bUn7JrZdXPn^CDEKb8}~% zvN(6qQORXE&SJdw39&UhT$zqyr*sgQlpXpe|v zgk#*61C;bsS@CNEzlcc{^D{Y?|EL78_OCzdn&PoNztTm@M4W?mblAFIsm`;F2C|ut zMX5D8;;llS2rCU!E-amFWtCamVy{<{@tTf9>P-6#$BrHxTk_}}2=TW*HZ5hFzLzty zQ_FV@aResFuPynKb6((Gxx~JkDPVO+&kN<-$UdV$%$UqD|M*k|vvj!ciZJ?y+;TQergFab+MY@$*}KM$&s>#QNl68_+yYkRY~g4(i9E*eaq#9ijgHXf?G>P zvDg^WK3_5=udv!}A5@yX*12+YyM;8fpXfu^YqN{>sI@23*ORbc_XiSN$(y@|ienek zo}PS+W?eF_9i{u4bnt|TaQ!E@|77)7_3xc_W&qXCf%GM-%|oyfbwDi@U~xH!z$Dpx zw|?3rxJSpRBI8#1Gke!5Ns!Z{6UG2VNvDy4Q5f&;OZ`tK^1Iz=;x53(Hp0e=Eq@~! zT)kO&aXS<0RVM_~_~MFINA^-4d?^RYto{>faH5Ne8U;0)Lt3ci>uxKB%?zJaDq9lsz7F#qK>L;~4nZE>9b0xsVG{(Jrt*jAMR| zhULJbS(?32+SdeTr8fljGB@vfcHKAtiE{=w(BNei4pl*ynbAn(0UOa{`0k_BKn#s2 z?u(2|>*u!z5FV;ZEuKIeX*=%C{s9>oUD+W6>i6>%D5q}A$y+iZuf9wQ{xEbx(~-NS z0LyhoWboOYr}A$ECdpkaVbYFQ+w4DgMWmO<^D zC2)WcRp4W%Mcb(@^JRUc@84Sw1>^v;=azGUgz@d4CN^&*nV#Ff)#>}Rbu>~tU$#0| zFW-P0fjKHNVm%Ao)hF27#xJ__!w;OHP&%5?<SLbCF&b3fbH6wBFyBaPj&qD2lgdh=9|?P0#nJDi zeDURiU?H_YdIrz=I6E(xYjwtx+#T_L=V_;}LG$@voMigzT{`Nq)J_~R2uEBmnMy4) zLc4&KENlpFO`b0R^Q|+gHp)5RXZ6eQ^?Ty7XXg7)`ZqgydJNjhkYfIQgs`)tmVr}? zz!~ow7iv44w;s!L&SbNFGb^uIoHAF1iHq_K5L$NvC3#Yj(6PU7q8UE8`&ZDJMJU6o zFNvFAYJ}41`YUh(gMJDNdKd_$&(BJ3K>I_k`m~ZV+u!*KSD%Zu$&94~8LBc~TuP(J zm`lkjA{wvc>VJ-jK=ht>4Y7v42dGa!DlGGARk|t?GhdXlEKFe0bxAayljJMwe&t=I zz98YWHp_E`IY6rD4kh*>3FnQ;k$%Pf2h~^cl$22 znW7WOZ}1ap*}MN|8DLU0QP0<%8YO)ytNKs6en73Wc#W!+Ki?m+_d+ni{Go6h>gf=3*G->)s@dTIy(s-9+)o>XIaSek>g4|F zZ4d8fS1$i>lfvgswLPOiUjK%9QeOOPk&iu>6M6HMTA=LN)cIc>Ld4?1nrhq~-d25% zdAretZ{qtY2FguN@+L@L;_Eb`1QbJ5V_{qVPQU6vY@`{5#g4-|VOhh&0R{|d2U`R& zV`}B8=;4gqCqg2Dyv`w!OW8kB%De1Hr8g1o3CbyD^I4Sd0X~X;T+IjR575E3hld69 zUdA@+_CfyuB4`F85fVuM<|l@t3^fUsDow&eFMnCCp63H;D8Di8tndXxC;4k{a+=-V zcMhPJ_L&{B-9-SiA6t}}7X-O!OHa{u@ zdSM~3au4+Au^WYLjmdp+=Z*{|xbQF@?W% z!G4_{5d|uR5m>BG1JFRk8JJ?wlMBVU-fqvg`vl#Q#O9n~c=G%s_G%O^;9<{dQ_Iv# z_fE>6)m@NA@wu^Vk^h}GCCQys&Y907cgz{Bt3z>8?X(*|-dZiiq=1APSnj%WZw2RoJ>ZO>Fa%qo(L~sKcD;gATs zd*e)v?t^e-_8&s;OEZZPBy6RPN)Go<-(kart~;PMl`EnS@G%m%@Z^g|%1s$=%vW8n zF9&$fB_gDmRa@&nFz(&jK3s*NGG~O}hlq)~`~fdcqWvQuS7?Y4Z`u2Ww(V`t*S?Fm z2T_DiBz*Vxvbzag8Sc8|SQmxSH`YF`J+oc@{lbQBK^vwL{kHVW5Fpas@X}!;TY5u4?8AA`%1(xf~rd(t~(54wrWuuv0)`h*g%%}Q%eN> z5_9!DGt}vKSg@BvGs2t`c1Mv^oxjBnDMIiz`T&V83* zz3)~>VA&9+Bpg$zDdd#ODYv#0Emp}qArR=rx*<7@CVy$&bm9usad7*wCzy%*`)#aJ z=Va8<#!@P$0xPmmDv|-g1evi!8!$GjBCqbQ;}a&Lz)6xXiS$F2@269$mtuHkz)xeg z_PdqvFJ0sf@>eTz^cEHOpGDz*Ig`$9+4Pby=TgedUsF!(3GrFp zF+Cl2nzNwuS?J|m^NEsFwopwVWrIi1Q1-XHxdqv)nQAvaCWI_?Jt5ppynp3B!pD)V zQE}p^Wl@%q!-x2PW^hsh)m087$LXNmLz%6%{*+p@by_;i?XBU<`K{MFh}21|(VB2sD*NoADdTeYyT0 zH9naP*fKxs8dOcFNB^h(H9+4Pzp3nN?mQN=jOI4q3K+=ZMLeFQb|CD2q?#fC8zPy8 zldviPkCD77)OOYrT- z>zHUQP?!*w{6K&}aH^*{eTY@VglC!9z87C(VHw6{T0JIFpZ+t{+NG5BxSfufy9x>4 z=H=&`UA~;i12B)aqkM?Ct5k<{I392Y$^ZJ!ABigg1Q>stpE?@8OxB9Hc#%J1QH09@ z(x|$&`V{E9VQpj@0XFR{)Y(#Xx|3ZYVf9ikw zFPE;)*>u7v?D(~JLLyCMZI`s6)1#_Q`QZ&TL0^D+E>}}|>Km9qhV9?+63~5D|Gop3 zsmqmn!@!{KX|qFpe#44I1PX!lguX8dvRntBfXq&RhJ9UoEw_%jNK~AyKNW_Leba7Q}e--vsiwIF&>dlp}mKshBJUzzPg2F7lxX#o_ljkp+2d@r~gr(hJ-K<(_7 zsVu+gjcC>!XQI;;&E5M-h@!g~vVY?b_5|uFaCJ^$2pQDR_eLV?GO%Sr0tKavs?@rv z0^u0SKOVKd6KL-kKhV>@#iDRa^O0Yn!j2e~DJicTS!n=H%`gwF*d8?MPqn*OXcD&@KG2fg}?R=B%{~4fF*Z_o0_MqDDh# zAyOKe&pTkH#{+f&u zBy2L1${nW!SZXU6Rtqe@P*=7SfY()DyX?h+P=)d`Iz5BM5w)5aWwgmBAI(z;y8fxhk?&?E zWTKQz!HwhNXTLN)jxR?dr}w^`=aV!@NQ{WjA}6k+(_fvb$~uLJJ?ri5DaDFq;f!bD z-IOGd3SMYt#IVqYxy$w4X%3t9GsDgLAY8A;XEeAvnM%1PgHm7&{pXrLz$h8DplkQA z`Bzyq^~Bggj1B>sm+l^Ip^qW^Tx*Fp13eU~u(Y!^PY$>d6mXCh1_B8EA{{A$+(55Z zH`*{;b=3N#e@)O&qjdryN}e!^$EhIq=b zTWI16cPPd^|$c1x}U*z zQD6b07NG1OiV7|YT-~qDT-AVLcXi%6J<+H7l{|+7o1jkmZS9qER%t3vre@y|jfOWf zcumL>r<6G5q=cnd9ct!nqB}8TEpK|op*o<_K%tNO-uL~_!hFsm19U+j{{#TXgZowK<$P z$CDnicS#i2L}&FGSi37fmk3LD#G66*bq2|T)JO3^H`jW(?|7V9lQbI=7Z26fex}6z z+xQ)DoJ^&m0guWHG;40mrL@|;duOaD*tDa0<(JtQTjZz!>3(cw{oX4XaQ2S zSUf`j=ak%Qi*KAZq+o}axx@T(9d1#URVZWaAfaTzzw*Z)AtPc}QV zlMlZTz|$t3Vfo%L)L*4|-EVj+^pU~`!4gvGlP}&ca0>l#dHn-n?;|u2oad{BelGq= zIEXIlKg+vbt%zmS74(uIOK>aCFUuErp#@gLv@~%IcmUV*l`Gq0;DP1K#7x)eubaYY zje@aT%!0>JG^5j3qIG{Y8{&|Gtr1Y@G+)yt<03NfYgi*)mU@~|EP;t%0+B?Ur=(EJptZF+aPmyi z9-|ppXZ}glyoJU1InSh%D1Je!`(@qe+uTK<5QtwAkgoRK^%i(o9UhhI!WOl^eYtr% z@$7@3;@GYc{q^E!q`T(IY|VAPyRp=sXv*wN*;1PmC$e;uw<~Qlbh`_FaZK7id57R$ zqiX~J%{o`@yK{F(U$ee$uQe8;*d$Qb6dI!W5!FzZ+B^**>UEp-Qv|V>i}m#FHd|>f z?3HRC+h@|RwdN294Qn522PCMmO(yZx6D}+))EWmMkJ00yr4sNDf4Nbm6n*r zD(HB-7iI9UQ)znx;IA*|>=CPOYIN(19#tt{+2$kMFYsEf%`%I!J2Nr2S=&LmXc6$n zK5=9V+CTL2yzPhCMwif@U0PL`@7MO_3l%48A)}|S{xiq%?-TVe!NWgn6(+~ClkMM! zd_`YdWj@X|5q*E}GGTf8axKr?$cf>uW(s?~qk*)yt=T7Pt6xEvP(VHu@K?9%hg0?* zIu7T2jn1UAe!#UYZ<_(*gikj+~SH33>{?x@g{VtK(-d+TuVVI=QT_7 z91ZFMWRx~nONaQ5158Y3@icrknQOmmpY5QGWPp#$ZJz*`Y z^KC|oM|dgc2_1T|qUl`uf96;=CmH4?tzJh%?a!PKLs5X{}ZL0`L0~^;R&^IyEt^BI-j|tI+GCAx*-`e z?^Y4dV_#c5d-O1e=bVe#Dn+rxpxKaZ*q)e^`jnN&|A(tTkB9nu|Htu%R8mrskgZZg zWGh=j*`iR{qHKv$){G>EX{Br}WGy0FS+X;>DdL%(MD~%Gp^PQ_`n}Gm_v`cf^~dve zdz#1NajtWn>s;qL_iM{((O7c*yuhW``y1@~FK{<-A%KZZZoS#iC0$$k$Ta<=QZ0C2 zHvTr!ANT}Wi9I2%wsSi}4{=-%DK6gjzKcN!63GRPm8Z%=h8=rnMTuLD`n2S_@O_1` z&7M2oH4cOvYP->}?Reg{3~zY>D5>!IY^DU<&M+k{Fm?yzKlZjLjf8B8hD6-d3jvg( z;e@0{MeHkFoF*=6RKj(v`Gp*0DTKBZQ>*F#7MD3_yYZKja)W_o<(%4dW(Q!?FN%Y# zRbIn|0TV>kJ!DX5G+Tt$GO2XvMn}eFaaKb;<8MCtlY1AKCm5~)Vk2l`e0K$sI16Zr zTF!z$Min-W#WH`RG(Yqrcz`-1YTN zh;*iD+9mf3Uo&nVe|`4#Y=+lC$39WQV>oTs|BT6O{JEpwkGzb>goWcK9t zWo>u7KGO_0h?wrf8~MAZj{M?$Lir4LW;@f}Jv-7bn##HM*ytyjSsc*xI^^vjVE;OgeW-Oi2e=kIBgVWG^j% zdzReb#GP+y!^+p!Np4@~5#gVGcSIk;1%6rRQTGv7d+A|rUFz4nZVoOAzge*LzSMkl z`u(;$!VwCmySP>g(>ynIdqzSWA3Ga>fY zr{-+LJ(%4!J39DrZsWL-ZQ_KRU7TfT=Bl%fy}9wl83=iW441@ES(LRXan0A|tBvE6 zY|5GX`L6<+ADRasgsAgSH21R_9(9#D$)9*dET#4ocSMSGBc6MxNMJji(w3{3ch13pq!d=+N55hcYeVPU5w`$f77F=}%&B9n$vGJ*Qk8Z1}xF_2I?~u8Q zM`Sk3^4(y8IF?>-U;4o=^2WOU+vqIV51;SgJF5Ovgu={b}NZ`>flavW7b_gF9#%n|iooksQ$jPuK|Ksj%JU zx!Idb#~|b7N8KfpjDn0yna+87Ee(ghJx*C%@j92aOCa4V#q;aUKw}+y(~~o+ngz-2 za*c(HfDgr-?+kmvhAG`A@imHyCWN1aUM{x=C%c3#F=39 zhqhGxSVySnIKt6zreWSX?33)Jp@vN{6HItz&>>_jASI`WpB*rtLkta#>0b6!yt

#9!>tOniSDP*0sPwR~6a@p&@1 zbOzV^ol@)^@AZ3mCwOK&j`J~D?SOdaM-OO${_myyxvGs= z(H%|l`Hh6mSN0$$#H_=(-B`HYunTIk{-+?R_{p+_JseZ^MfDc+oH?;()9VSeaA^#-o6e`O_iL$JFV zwc!I?L?q1QPI!qVzA3k!xGl1f^m~V~Z!R|$omETE+GAU( zl|I`&%O;gmGbeAWUgLY`;PPyz$+eeH@up6B887AA{HU-0zzN2{wHqWp@VHr?Er*m^ z9=LDsPrQLk{Rai1SR;!Y6t;=eU3zzB4-+wPuCS$CgS>xmBLq;5?c8FJt@NHlH96D{ z;*ucY+#-L%zRyBKt`C1v?=a(85cay`n|qwQZmo-@e;E1i4IG53!r`>I*#J1s)iY!J zA+pujEh8=8cF!%yYBWYx&7pliYJN4;h8@~_-LHMt^=As4RQnQfa0%5aSXim%TG3tx z6K5>j^cT3z=!GOf*0VtrxdOq(wsz|`9_nd`zqnQT*0vK~>ADBrJgE`(dAH%!;7CZ6 zaQER)fy*|RV5bUEN_-pmYD8y@bnFePSeJMA#7s#A@AcwgTZFZMx`<|wvc@*C==VYc zClF_S5FGs^E4s}GUUjar>`pufn{SY7aHJ>ay<3l*Q*8bNBN8&k%#AV>JfZYgtL0H2 z0jA&-Z4vQW(HV2x`|kpppU+rZrU&>(_+~4}kUFx!zXr9k+;N9?#=-r-(rlI_Y1q3y zQ=&XHYp=@(=^Z~5A1xCXPvcJXSAFNIimNNh;7Bp zHH9z64uvmQ{PJb~&KcrlaG_{Lp_@~z@lR!hpvTPbX=+>KjU1x`-;AHGl?0P%a~4|iD~;a^q~ugTOe%|P*0 z@4735Mlx^uo-OTD3wyyFO?@4`v0$Y4dmSfxHX|yJB!5aj-11Yqq4~M0xuBn(Df1&I z8}t5h>g%Yw%&56NxLvU8oGl2HFz<=o30Wq=JI{2=_tz79lHjVMGT++KN{$qgS zKwd}zq!Dgkex>fihv093McrfX+B0W6|M+9F`I362Y~m3C)oWf~=jLq2m6e^ImO6KB zCKV$8_33%+CLiP|^q4t`YJCH7m=_c)iy9JPz5rXbz=jlc11o$*IWZLe}>^!;Pz+1C7b&b<}(Z zZmx+HfNgsFodaz_CwC8 zgk9b-e2v0tqWdqrx?W(-x zg5EA-IQzy$`5un-YOVsd99~=;j7^ouphH2A8FgYSvc%e*{TIMQw)$`jtq5dQVS!rz z>Q$;+Xcasg8DtK2=cBOa1LCx;`PEc;oq~j0!wu_WsyypF;-V&#oObm@mo8R~N;-j^ zC3&F>-7HY71cK_G96oZKDKnvK%R4z+gLVz#hhhz{^zPUwvl{5XZv!#yMXXyMj9;>Ty&6-aHB2o z@VN4zSBRg&?=JuEoxgWlT}pFdx)bJB=9T+nIo7)Bm`e%WTtv}51BUCU0I0g|b2iX1 z3A>STs6D{H@UhcFPfPwxtJSUqh2L_YZV@KvNwSnTX;URoB>-T3ja5xWF!33fGhnkLN;#|&2Xxaw?svv8QGK!siBwIt-j5-viCS#2RQJ$mmQ zu>v)A_2+=~p!To<<6enONKA^d{9&$s>D%s)#+s(IgcZsYXSar5=U2-)MtX)~C>$f0 zUOs&T(ao#b!y{8J8!%15X2qjBZ2(a5sPy zP5OAAtID`kjq4R&JzJ&MhU1tnSAj^@U;eZwJ)td&@+&{9VU}r2eW6$Dj1tm6BkJ>(*)@51J`78_zo?YeDSzQx` zt)`mh%qTlhYm>-F?k_nwZf#gIy0d`bk_F5AgiP{RG?|C(R@>^zq)en!_xRKv6Vto>YMii0XMLQQ-q3bPD2XWaa-DE-syw!~QXJ`)@S%bZ(% z*A8ap_Of(b%s2LG2CKIYQJY;fMV+Qvlgn90Yp_T5`+1V@C^0sv-ydHNr=FatXAYj0 zbh^noz1oFpPRGj6l(o2)>SGampu}QBE!@Um|4?IUlzbUvFdO8Bky*mtOTrZdP>O)V zw=Lm@ZYU_Wvc2c}n4`MG?%(<#RTVQ`$)9E}qe6J{z59r4N3k<-7?MjMC}{ivTrDI; zi@CHQ{JWbGyP6?(tx%-E?_2*L>bgF^KCSuXf%dlQ0glBVIT^d+(_))p*2vUXPsoA^1WjypOwo_OAl3~U?xGInT+ybFK zq345aAn%3f;Np&9$LV#sv63cun<=IA84nhdCmmfD!cKdC)n{2EEVwwukcn^?BqS#M zjh`=QNJ)ObjFUIg*E3?Od~(NImxeb8-jodsE=~LU9A-0vlpxd)#;_z*1$|-5DVpGq zg$=V^#)>Q15Ouoo6N?8y2IMD?){jESBo&LfR>F7yIg5EeSt57HH%6;~+bXm?8b3KG zf#=2FX}M|%0tYCCr{h&k2~x8YxD}S4Qz$6-5XV#PO!k2TFmSNu_*xM=0WkY{$%q{d zXjVXp6%=F)Z{9GRb2A$9^%@ga?Cw}j+MiwhOfGZyFuPaz(PVkwdU48u#N69c)t=r; z+&d>Gq?RsuOi(D5=HA|p?k#@jP@#&-_cgXE5H1v4xGEdGty_?2MC;n@j&WgkkCEoV zc}`+LW+xT!Mi+t!)E3-gS21~t`5H?rLC#RzABy_A-{iO2 z4H^)ua^kFr9RLsrveGxErkPy|?NWNQx|;p4eQ-wh$E;D!Si(iqqCC+e->bKF0IC*S zp`ZEbHkV&+P>8TEO|NS=CFOVZxnm9?DFyLM_*TzRGffql5e9iF#6A>I$PCvUtK594 zrKh(bae1#u|DZH+tTyz1t@82p@qJs!zi#fJ2CN)mO?@!l#5aA3Q7_GbzdbA=Avln;c4Bp$cKs-X10i}E8M?Ik3*27s%tf1Fg_HLf&9&ril(sE? z-`1?}Umm1*sFhP{VsmWm2$f`EXzr_`5f0r${vz0uNUL!!Z}re(BYodDfd&)24!rS+E>uKoO*Ie}44 znfX?_z>Hg}IFMUn2naYE++zkr6e#7nv^S5mP7E2btg18l6+vbWvQUE!UT=H3_lf{} z|1l;JVb9Ej-#p`Ikk~~nD;>lyC!a4WUr@lfV3-i^;WtIMS~mXPmcRMe_t!w=qfP1Ns-_-s{;N{0@XOy`Y}!Z+xqlKK+2$2yj^p#qLpvN}XN18CH?Reh-}9xu>SVrqR% zVj3c^^h8E)q}|pR5$89z-|Ilx9wKnx#P;o`+-%6m3#yhT-G*}1T=&56h@b-tb3~51sAT~cs4ZwoD56?9rlo={XtRp{wz)*@c1k4KqR6x4ofMQ@i zr*6Bjt{%UsbGUtgjGt)b+|oJxQ3J>Gc}p*(oJNk}6K{fp8KY}0&q4_Mia7}pu|z*c zaXt4q{dKx0jwMfuDm!Hm=cWWUjK+}E#>aq@9ue~p(g1be|SvI!281x6w)u|0M(mDH*Y1l!bR-NrCQVG zMi+<;7P3VVM^IsQ2)|QL#5@TCO^!XY{i^-wha)cJ3zRaIbG+DprW9hQ@x$-7q+Elf z>G;dyn4UT)N<5sAq{ohEz8a(VAc5{MQEIcMw$u^+7c3uPSODQG_K%9TnHi>SAUy^{ zg9{$- z;&^?&^WlUPR(*$~s_tw%6mo!yz*r0zwuCn2TwotW6_Zs1pf*$5^;Hk`l8g=(QluA` zOpZg(ri22hd`o)L$(a`=GbSs;CVKj=hLV((;j)-I0ElOg|;!`v5 z0W!F(gN-+Q5v-Tp9q@UK)30G`{yRSM+cM_&Jqmj1%U6CkualI9E&*o9f_{-lC)aWj zg2BWRbGN?lWYDX>?h%{?X(CBw*Z@>ykIO@xOG9{9?ox|NXjoYB2jbSaM89vefVLp8 z{+{IRf`OZ4Kz~ftL-e6ITxO(#bv>cbz);9494C?I&9o}Z<1bn>t6t=UQolOGN$!Gy zRS@n64s2`++Jj)+MeTvWXh4AFze|dH7Y) z`tRB?kBRCmqx5aN3bNAnwBha#i<36_5sGwjWmSo8Ip27`t#!>pdH1^Ks$E+R^x%5`kS6 z12Z3cgnfN#(z^Lq=TphSKBbTS9RbbH$h{{49ELbRYHz0ZpD>A6vGD>}@SpHLpT%_d zZP?WQL>$0GaJU}orNnT;U`lU8X(#_vI4TgoWwb4>2 z)ZyK`07)}~2ksc1EUBI2s!{2NK*EWgG{MW(jk{T)_lqWHi7$=v2aYTiTNO+xmIHWK z^c@OkwduNq&O*;5|L?&EF0ID2EXwkyKVRMd!1BS?$ttR>vW0b4kU##{w|e~zrGuh@ zp583|$7v%(f2YJnQ4}D_QUP^Fl*~p~h8q*ub1*?Gs6P)bsHc|2VtGdobt^<>U;b^G zzDPE8O4uSUmwbU)N&Ntexq7AadT}*30RjD}-$_$k;?{ji#OhuprJ3vVvXl}hIAkUs zzBgJ`PNeVy1}!9$lTzUC4GN(yA>FD9b;y)GDmDTEJ-VG9Oti1 zo|d=jd;Qh_>E!o|mDSa;^Wzf2IErb_<91zd?}k-m9~i;>>!FR08FGZ??0r`FGpZByhSgXGnzd6{0 zr$;+T_t*R{oAECrv$pc{ZYdrL6r*24T;ZWDy6Sa7SJrdMg35Tn+}$6dBRSRAhL6bt zScQ4u@|{1|e%BdA6fg6hoYATI5^Ho}-e=RBV~vn~plAvqdmymPr$x3dYg>>Tz64px z^rw1qal5%S2r8Hz6x@_i@oqo@YB2~zFXpG833;Ky#7<-+RiewL!}`06Drrsqt$|?* zk=vTn?JO_W_qHXF|0p`#;#M-JtirQ4``U=UNjQV3G&J;YURTo!)arC zXI7ag39WqCR?-3&3!hkr@cgR4dn+;II(OB~-<+hpNAIzr_rOg~;Xd>=0QTYF;X96r5CJ|wRBquGKHyc*J6 zeRsOokqSY<2)%m%VYajZxG6*da{kctQ}y;-{r?`^^9ZSZwxeYAZWAA-`5VQ`lT4uj z3n3>T6#OZ`La%h3Oi|#2oQeidHk5(eRW({t>3I86uvvqrWmW3Z(S5x~os#onYd7{` zq)h8oa@PkQ2$N>kYI%nHNZtT9xLaj;ygR9e!9C@wl3nxTJsJJ!ohqH&d z^#(1y3)r1MSjmVI+sy*JmBVCi9T6^no>%|P287hg-eoRbE&XFfvPDlWnz_1=swY`C%= zw=)t}b&XzOKLTpJUcHwumy$7<*k0*8V6&;*xt$ejMF$fkPV*+EeP4F|rM#Q{`lQ3) z)vKUZr_S~=?_rfD9p5P5;79#5>xg4m%7?~o<2yjiP!vo**lO~l{()~K-`!7`mh9?b z-t*dV{g~J7BETHSFES`$Heho^q_BC9?<|zzvQ?_eOkb4G>Y|>khjrj8&#t-6UR~ds zZz`b$9>auV^EUa5xE?L$KEca2>$hTRQ^-`uuBE3%EJ9p8`yEcUE!M~Kz$Q?jKu4&l zPdf_Oq&V~40w-hg{lisQxyv9x`NA?6+p4z6C?rY_UfF`N+yh<{0Tm)VD+2Ll1}L&X z;p%`QRRiJUD-abqdR7G*O&PyCuhuY6dl;ftHEg5~eoe^D=t&H1v#sO6j==dEkpTfy zuOZMG^S;dYmG!E!dciw9?A@ev2OOyPZwu}4WfdLy%|>C>{YtNWc%}I`n@6iBGqw5` z<6ZT!n7znk0Mi-^3FGLCu5z3j#N!4O;Re#Lq_lfwrD@vMK)G!=K6gsanP)Pk?zxP(i(+Hf+nuf=zlCB8o07f#<<}z*yb}lmJf9 zPC0k<0mE%Bg+EXx5h{3J@7_jlz=Pd>TNldj#^okpXCtF(oF}v1HndsEA*n_S;;t#u zzm_`-xv^mA#fQ!1z_d!5)V`NN-4@bW(47HulSySU$tG+QcyC@gnH%s)LHd*G+`dxz z3gxi!?D>pJqs>WWkbwS)tP~hMA429Mgo8nMrX6xXByR$-w;w?(lzh~@&+1G~Rm(|_ zGWBs*QVz1NenGt3+Is3X%nMaX)tY~2b$PIZxY|VAwowd;@SmeW{fgBC$Q6(a>;aY&+4-AELrgwO%4O_6TY}VB;Ln z7SR88A2CE9zH(;|juHvA*Zepw-Y(cZ+cAWwXy)u7N2@J3E^^K&yb^ zScz(bClnI^Gr4)gz)?`ikZ}iPQVoh^vBvRhZ@=Ta^tiH|_kBuwhh6xt8WpP#qXLZEJR%panT0S2MpQP8K!Vug1bThEQsL+zLlGV;%@ z^SGA$U<^FcT2QOO!gr`vS-le4S2k~uzbH_~@JJlexcp-;&VHo!XmJvqp^}a&eobjH zVhG@P>>1~sB10hE?m%?SCcwo{G9h)HpvKNwAzO_fQ>8Frj%RDQa~## zMV%))L5_<%7y!`kio0ghQ`-UAh?lTRT__t?6O)G)XYkHzC?yS56Y^>IwIRvEOVAFh zLd$|-65uC(>@;vXWf0JB854+PAlt5uaucePIQOi7fx0d_RfYx4{AvhETWZF5$eo}i{ zChprCrUEkKk+;pbD(ce9u9Btc_@#xUnTfeHh!})h0pt~9b0zw=cV>pP;C7Kgz?*yioomyNDu2l=QLik?;o-QeCVjsT5-YzkpRd znyF)Gv$RV2I1LQ?ZywN4$tH0jH7so657oG$u_7YM>;Vnc9iks$nCdaWZFuvEi;z*m6XgZ#hk)T_hJJpA|N(NLqF)s;FGkdO>#XmUC$eB*jt&A}** zBJl*8{A)KCfgYV%)MpZ@6x8iB4r)AtEx{>JUFiV6sz>5MC!yewK*g+yxED9un`0<+ zlL1vV$C`|}UrDpyE4x&5-pa_yxXkxe6Q30Z(_u$C%u1)GX4V#(6?T1zWA0(tz4*7; zvJyD_z9_(6UWK%j=Ae=O5GV$6F@ps%6yflIjGo=}T-5(7d=hkV_wF~xRMkL5Lnpsb zSg*BQ0Rx<%(RLyuI=0{kO(-`GNE)hX;V3=Bl8C-nV5>JYWD zgSJ`DaJ?;jWk3Xb0*wf0A$*kl8|0R5GcM?>9>4eorjYU$bofEx&gQo^pMQS?pz*YbL}e(E5l!9 zHzFv~$$@4!ZoQfw?n4re$A(|&8Z!LtjqMF)2fFLqheQAwdGJH<+A7JDxC0|R`Pk$j z^N9=a4``?cF&Cgp-UmiNp1LE%GyQ2(@*spmY)?}SoR*-nPJ<=HmgVp70b|tb>e#md z-BRfKhqB6GeT{507Dba3+L!n3lSQ9JP5I!pEn!8pzI<8&QzmqcLx1O4`#T#1C`Js~ zy@LvJyU?H6;m`HZK=ZeUhC2}0?9=y zX@e}v3}78_wL7#4&(Buicr10~Sb{CaiY%!V$j|r%9frxgVY5F1B8;hJQ0RliMOS#3 zXtWh|MKlqjyO{}Z&|F*S!q^nf{D=mG4I#piHJ;l8@F;UQvJVN$x{|nZs3$~viVk*& z;e>UO_c(yFtQzO6g9*YG4#$Jgf$qly&kNli!))&?YpAon$tF&5we zS;r)gfaIA$krH?-_o3&kyCGXf+D#^;+RRKvfH8rpU=UXwYh2#9 z_UEQ<>VOu)#1hL|Y$7EIsEI!&uHWKx(uK zbXwhD0jU;|lrEwmE00jQ58(H{b|5>cT~>cX1DsT3 zCAcz-KcK59D>2aU{tY~HaMsdS*@ zU%Uur^#Jw;y}CX&1vaqDMyL&!?nMvJ2W~oOLaP%}Ldw+zRn@?_pwOo5zFvUwxL*Q$r~xpbH+|wj8BBl;mQc-RLq zAt1=~I830`=wHGwDu0hnfdIM)K_+b5@wKQZjH?I|Ygwlt)&Yz*RYx?e^pq~>N+hE; z09{m$_syyx&wm_3raU2oh0F|QDE!J!hxO6sNI?r>u_mT)x)Uy!cM=o*r8$=F_U)T2U)uZq#SJax!tdsqJ{|Z@3bpMPP zoe~?l_$+B8#;ssL$*X|LIQ{@l7n;7(zYAzLL|UT!e27@4%NZmp!WVK?k*iapi2 z(EL1f_?@Ot!V|>$I$GXjSl-t^o}Y)d=ibveb4*BZWhDWW2Z}c#dvgl)>q5){Gu&k(SF#`Y%m^|2uWENlr+7xR$Y%l#g zkmg6QLCe~Xc7;p-|7byHJ^2EXqCE7KH68I79Aj&2;Du&#E@d-_w6*WStrG%8s#3%M z4d;#gR|E~ZK065y-u{I2Yk=Bk4pb!s-d=k_}aOtH} zj+ly(9(Y4HZj>vI$wTT0IdF|l&`O4`rXv75U$eMKQovt&O^UxmAx8*ZanQMf#so11 zs@6mnjZgX;;sT2y{}?F_Gy(w$**F*!a6p3S(%ao7!41-72(~l2a#ho-c3=eN@2e_a zvu-{4MR>e7rJ%P6cWC?A)R{^{tE6=@91$5L+=iq>lIC|)H=i`wo^tQ(3z@A(9}gTF zKSrwXH=bhJ&%m!ln$3CnlD#@>w)ELSmjSX;GXoY^dk`$w9XFsv5BjI4+0TkSoxE^J z<_WqEVsV~nFV;Vm*$fyBXp)O);Gl$m1EZPF`4w|EbN@fX1OheV-yLy;%~il0AelF^ zW6!?{k+aZ1oOZH-tmwBZ*S7OP(Nvh6{4Su5{;S5o8W|%L+Ak^#<|2mZ)0gfXP!6W^ z6T?1)pdmteX9_K&J8W^A;6r<4?!#M6wmWGUAz3Ve(eTLH!&mIK^I;f~PE#lh{qN(~ z-8td!dMD5XI6W-2J2fdORy*NO81`S?9!1h2SRRbL6Mc??xL7EenOBPz&$q}bYHct{3q!26I10~x zRD@?^?jd$Xq1nHnvsSfsYXW15(5nL*unr_AC3f6=aq+Hn5sMq#*vw$zDuB470o=K* z%Lcz4lHt+vL!+AgR{S?jL&O};L@mX!c(;G-&V3dc6fg6aV)SXltp8EmbiM+l#{tBa zl!}DO2r(9zpuNU~hmYWY5B}e;nKx&{CKX86c)Oi8`42?&a4>xh1FNUu9W=U~&m1fVAk zMpWfP^!Q!SWJ;md)IIw9O)ijsD?S*0L=_kzU%5{as?@Ko%gfvBAh-MAGp%=lxo1u7 zZrE1wzJ>#dP*!`zx42S=8{45mx{eW4c}4p9-&1Q(bWc(e6_J1`KmYJDXQ5J*+68!IfZ7UvhFxOVgJwDlbPj zQb&qN9R=t4v%38IxvmPx5=Cr!MIY7-4f`-sH(m`sj$MVFcJ>?_V$)P@h=5AG42DVD z+MOPphOci!!Zm!;tZ?nQXmsv}ALiJSy&=GT)n~#i{*vUSjD}$Ji^1!R# z&P+`TXb}^N7EgjeS2NhH@mwZCohc>79~RX;0lj=3z3d!u6SkOYcw~oaOEgSD95|`k zgUC#d(qfWt`n%F`+OVhcX=P}jwYT@1@9)!~$dkH`Oe1ZEYu*oI^T_aM`T_S53q~YE zSY6^w*EWTdo$N~-$E$?+L)se>j}bBu7hA1?0M)TI0+@6Y7-iRnGz*L=5Atw{90fZXR#U$S(-?Carq2t($#yU{cj7g#< zIk=C>Q#-}6E0575^zjU4}hjpJupmYKG`GMkRj7R)i_*8Ep7H3N5ym=c&G|fNVlJ`#> z{7$uhFlkoUtvSrP=g^vl$SyrACt%PcclwYU3_-Y57ta{O3|w?T)W80gkZmq1d_QSz zgNW8SB+>NOD0pb;voy!{iUDgndHhh9NB;9iw6b8}E}VG6$0Li3%QL=tICbN9@U6pM zS3Cfrw`|$q{bt3SWa-an2r972$6$KQ4DYAyfu&cW--W$wy-c>9O8+6}A+aXBAxLg? zmc_e0B5+JiI%j~Ol@lE+AsTBNgInfY{^>JB#%m(L48s1J8Lf<#y@o)5>WzqwQ?PKn z;wOh$|qLfsJun;Tv()=Q|o${|m3bAP(Y6 zuyS)fd!3`N<+T^KNe$G@@i3tRkuFP*y~efh`X2wtpwH~7eix!TBo)^aRu%@ULj;<+ zVXwaA&s*B{N>E0i2hR^|!?0y*yYT02mx;?~FKP>VSZXjaG^qd=R2$#gZ>yQ9sE-H* znn{u3>K#6TTeJgk73;8uCy3(x990@FftRw(;C78&tq_fTVIDB6jIDz;Rn>nx9Gn>O zRN-@8WPJis5^-I14tpLQA8~QC3!rYt79PSE780J~IQZfjl_NS1(p7|_%;yk!A3czX1#SzqxPKW)fd!dzDa8MqE5XA8<1Kb=;)@BWc}Eo!y0? zk~a}(mw_-vQC(-Tdp5WpCVpL5O;z2^=kn!(gpC-E1fyRM>Q;+YZdgeu*j#3mlF&Gnt50q8qhw^Tz;=Phb7~}L1wG6}So^6?#nFB!sQm)p0&hzT>*7>EtK)&)Dh^@;iD9y`% zu8CA9ECJz`WX?mx?&~hew4Nj1b#{Mf|cO z!+hrhLHkR$HJmE|0fN<^)B>_RFTT6z)#brCHZav*1nLGR7+!NrJE~~0s)uvLVtA#k z4rzp+LBMk9u*2*7)F~;=Du*t#*u>;~m(P2H-k@gTdV0gMm)m+M#pT1-hLs08`A8zL zS@x(6uNyy&3*NnaqtBE0bnMf0?#C^lK;yzeyEvH_=nFt(?r9I$EAq@BW}e#f--GW}f{;Ym=*`(Y zVvd<+Je`_{!J`i0Z;yGzmQHOsKfE#_X5TRI0?Z*b?7wX=%9Z^bdFD~ixm8T^$KHOE z?rVHLq;z=yvaQ_<>K+&{#geJ^R;u^pl+keKa#HhtN}gm+E#lTm#4S@e`kK8wBt%y_ zAH#@t6rMzYc~w0kIuawrzm;Y$cNj_QyUso)8W1g0FY0%%lfVB@d7eFCcHnE+@XYM+ zv#&LE4r0S0nk|(JBebvZaWK0rqV64Tef2Hmm}m0H)uIX!hiyt0N5>i)9|7+YI>BnLwYYx(}bNKRGz^EF+RHTW3hhqwm!2RG#)R zli|lU*lmtOwpiStVy?~*wffv_B6Ix7!TtpE93#)C4s3Lpl0aKt0?wGpo}Oy=B~89! zpVjHmXwWNnnLV&;Sw|+6wg#W`I~Mj;T=+C+;a5eU%w5E%Q&;c&K^W#P9lwObtv%Vx z7Rl+)=e3DG0^g<=H$;P_-sFQegwvKm73S>d&feB(o+Q3;Irn8B@u|;sFMqp($XyFR zvfUu%67zSXJd5JftD&^9UrWdDmsrbhB;7z-IuS8g7bGvR8{b13J7y#|b3&Pt{?@2- z3)<;c!wcStG`?Wsn2o_9gO;W`ijdEkOtcgiBl!Rt`{eLCCw|@p)-j$EtG*QPR`;&T zvA!K9q9%29@%Lf1ri{$?%7jn(P5LfO9q8-9_x=}Y&wnx8p^-%0{pH`n&NnHpQKF2`6 zMVbkXK3v-QYCye!rmRpc@RN30+p=-u>W1Q%Q=DJXzMwq2<-VHel=<{ojj*P$`LM`pqW z0Qko-EcZV#MG`aZ+w(8MN32(s6}1RGQIY$4kXb?6(n6{=TRzk^_U(DL`NHt_Z9gug@;UYwYiEwzgvo8ZIn`FD3Cg$<%x;2e zq<(>OSoO&B)Ec!n3s>0bc4_*ga=N)gUhat94&H&C$kO})CbViAeBUWH*Ku~h;C$7y zJBGxol0|SRG;MrO#jZZr=qMYYogQ2a9pOppW>(sZ4)a_bPa5vf>0J`PJnJ@}&(U@C#s3W1}c)Oesa3`fbs8~NijaTr`H|aA) zjK|S2!yp~&)Ob0p&fv~@VkZ5P^^?>SKxNrVbe+}4`5U}lRkloU&tJ;u z_196;^yi*B`Z~xUo-&z%(IY3l@BIP!0bTBoZgo;g%?!a$2Sp>3&l15Li%TFH=8t7` zC4ZC(J{rJ^?b!(WJ|;3!Hx!6G(hjP5*k#y49j1h-`s;x$KFr)T)~pab9xUP!7YllW zyaacgV3%JsI3L}AxzLZ|>rM%btv}s3VG#_%^t*@bcJpDRd0;DvJF?>f6vzn{JOjq~ zLv*lVkzQl89O`ymIFKfJFGWSrQ4tEdzf+C|LC8zv@VbJWZu|gEOBd%S2Ix`b= z--h?y3#LTlXZ$AkOr>phU{t7j^l0xUf_R>av&Rw8tT1_`2joMtYL@W|ks3_?p-!@a z>U6f;Dl}5)fNQ`Gk>#;fUZB(Y>l64sl7EU6Of_z7*tOo`^b0k$K`1u@$?IsT!KQmjV-LLe(4 z#qRkP?$gBKMVogaMT?Ir(Dh{Zvp(LqW7=+>of+C}&f&o0Zi0)W% zvTGXUY_vO$5J(>}N?43QLuA_p0s&V1e0yX-9l*bo&w#t#xUhJ^JnClo;|5m(eS*lw zkFw3z8|v)=tVcTMG3)ql86J(R@WOwjJ&M?MIXG?p_vk`UJbV7P#eg?g1zjP{kr0Kj=n5NkLGVYAIT53ul#kp3{S3+EN4LVdEdInW>RbRLlAfOb1n$zf}vk(Ym0L4uVFdYsSx@hKJ z=rTDml1GX6*A_Rci{AsgU;UHuB@Oc12ssA$L2&~yhQAJfhkbA=;fW^ik{c$z~0g}KU zMLRx#0?}|7K~~$KvK^QJ7#{V>5nOIi!?}-=u-bRf>2~~P;WdePMqHgDd>a-BUdXLO zbj;Gq9Wv5s7IDjJPJi6*t^R zB8pUNG7rpy+|bkByjPKW7=D4r;GeiOI0a6??dCqh?Oj|%o6rr_g@}-25J$!99RDNT z%Gb*`u|HuEKs2;c@(7ELaRO8kU`|ynv`fYxX#<9UX}2Jl4)!cx9&$OL`F%yl(lAH@ zLML&pz$}fw1c002?8lczPlH6@1CjvVk#Tei;Pp&$eCggI-k5jZNk#D4D1;*-S}TAu zx@W{i5gh_|{~^jvt_A1^I;8G@#ZFC|^Dn30oq1#nV`DMsey-;RST-WL8j zFjLw3_lh9e>#AtGb~K~1rqaeP_q+`6J=aA5d} zJ+Q9H-~}uLtm&ONw87|{1IZxSgDxq#CldJwxMz5Tu6?&#@dBO;)7S=!ZRo-cXTLp& zDEUY`Y`3h(tWE4No4;Oy29X4J>EF&O_s_gu;R6WQWj9QXns0<7yhb0yS=$ZJ?|N5{ z1cCOMrDDfhW|9dKXpy(VBJ+y8z<-CmfqyzvSg`yF561wofgD%`+(xMHnuJvsqiq1Q z3=so@rW)!F(~+Gc^>sqe4}-=tRA-_-18sAt7oD(c6zJjz{IKr4R&>UaS%lz=M2}e` z&=0D|(QIym0B81pTJ~;PZJ&kJi~{OmBR`Ma8rt}F93;FEG$TfpMN8ES%HHfMEG}|J z0Qd>d!68dxfaN%jPKBs54O+C*7MuHPs{Q+}0=MDk5%xgtQ zV&fQ)Jpg(M902H?4weJGY)l8ni>LlO3J0}tM29*u(wuLR&&Zz zmjL!di5=JjSS=M|->pG=R3lJ3&IkUVDMES>&LbkJPP67J(Y)qYRMrjJ-xGo`09y?^ zTMnzwc*!4OwoO}e;1#P^XVQ?Z--B#E)%xzzr{Fdmz`zd12x*DKGjd=bd_bq%HDsG& z^~iOA8PGu7J^moW0PU(HNbwNt>R^`iFF!pX6MYJqNCYoH27%*i4{R)4Gy|9?nDNrV z%+h372Lvtu{Q^4v2}7VLd8zm5mIa0x+S(ato51j(gJF1NX<+rHI-Qz+@=Z#L@VY98 zJzuYweASz>HNObm(7=xDFO4+YSS{ar@$YJlrZB+Qg%`1>)U!slA{yw?A!NQM)Avo01nf42d`32r=bV>P2y& z6F_>b1NmHBl#rBJfa%Z?0S4TsK!hQy@*rCq91BdHqK(n(!E8qCZ<6$|)FFm|?YL#o zGXbW6yP$Z`osD;Z>on`>4Qse0ZMQlTNgt>U9+%a93d^1Q`OG|DsOX?)hQ4qlK%Rdk zi$;9HN{AIRRMv@!)`;2<+ig?Wla_8)ov^oK!`B z2h+m6@fH{WKqa!qJB;P)=qMbM<4?Nan|cV4)~fkQP5@b+GhHb8;$E|y9;u7;1;N7f zUCBgj0=QL_hGEpT-Q@%Z7U>0iR8MjM!QU>d!Xw+xC5YP_&^<-9%b!krc9KDTT3wEJdM`k*qO{B}=(WDNDm-4P|Ue?n1>_DiuxE zp=24fh_Q^a4aV|2uhIQH&-e5C{aNO{UGM8Uuk$>P<2>GQ;G+D*9@g5YO%SS-_tN{8 zHs1ff0;b`Ns?xTjE*#d5^ZU%MPVLGT7x;_xOPeYRdZQn+x3K}aw%kweae(l{;UhV5 zFfHdM1ntjZC6*~ZsIpiw;tPwy)K<2)a`Z$`gpj=|+RGq&4sS)K769qZ8N=KTlx9JurjTM>9xHH}TfhQ9H)&(lUw{^?weUN;4wo+6DM;(x ziZMj*XGQWEb3wSNKv!zW7SZok?XrYI*!)EIl19-3dytk6+UGhyTXPwWCIcOT_sBT% z@@&5W1yOCVq&HO_C_aW?1*vf2WJ33mKyjih)|cRD5c%r1KE!TB&i=^@P|gEUtK!lY zu}t2vdiO12aHStR0bcH`{`vhNi@J0sT^ovnyt9B1{J9v!ER^8=IJZu+BC8+O@$4=I2b1W9BjpbZNdFmk8_bRgQaR6=GIahl+$ANlGd zEvn&jk-U+f8b%Ri>$#^PzwM*e^S12#^(;m-vs@1yl%7DHCID=$cuOGM2tCV1cE?$a zEUH5`l|kvLhZ2^6`;;b0`mupLV5cyc8Eh?ex=A!4CE8*+U0Tk}hAwY|XNTIvO68V- zc~)T1-&-NUI0L^8CS)9o)OA*WtYC*yWt_pck*9Ia|MI-c zKjtVr4ES^pGBBU8-gnA@)x$I^iR}e&>AoJ5eIBxiYZUx~U?vZWmb{FA$nctxFvHwF zC2daReikkPHi5}-#6+F|qEFKG$bpnt{Jmz_x=@OUX+t4>$P%TOQ9OH#mE!SR#UA2% zX(ihLfZw4+u?qA^t2s=HJWDbOmHLx0DvZ&@puI@6rN?0P-}W$&mIeWg($+7MH}mX60434T&ydkG-c`a$-wQPgl&13 zAA%j6Q)NXO$EF;==A_(zgeH!IVjSlFqXWgpN^%k*Hydejg)coDPl)`eJ9qz1LQ}9I zMu=1F-9y=rVeG;#C1zE^Fs*AB3Scghz=oHh>HluEX6nttx$hEhMx=@&e7 z6iE*nZ8hfPMzEh;?hcVk7Q`y}Fn1UaoVBvK-d0nqh9Nf##P8WD=kPdH;lOkjy7~H)2 zkJ`aIZ8Bg&P3^t<6~$s9RY(GElKjsXeHs-~JTY4`1T@@nMU++E%asZDM5`$!Tdb?h zWV_izp1uGE_kt5XM(O3f5~J86NJNy3EJuB@VR>D07sz-++2F1TlFi1SKBtDzPJlx} zRS6B!w_k1bW$ZIFz|oan#!X6!hsd?4xw;qkg^H4vfN_%c5C9bjzT+*v(jF&_iI5YK`@T0hkdz3yGkv5?UC-oO~*q zA^F3D>@#pte8h|*m{kMGl_Y~vTS_K8#WoJz&^w`d4yA0pw)QLV2>nvtVSgwD(tvQ- z&eCvW0OU3b=GAP&h9!4&Lw*d$$0;doOS0X`NG-1}2P!hWS<@k0zu+wKt4-$9*9!I2La3&(N(8QsPr z2VA>Xw?f(lDJP~m?O(1(<72%F)#b<_g%S&s7oRUP3uJHIeJW2=)ZmVb$LB;dhQCft zHPRz30>k$isy~g&(sQJT*tu-aETCPu z_`pZ3{eh=dRr-@lcm7VvZf{K<7g@Zx(K$Hsf*O6_f$kNP+-QpI9o1UU)Tq50niV$^ zvuQ2qNja&c&y)~MZfboxwC6?uKacGG;xe+J^rFfO+1srC4_D1^PX=jNsJ9$%Sqy(x z>2xWGUsF2$El&_qaW%fGU(-LeO2LvTrpH)(9E|lP^|Pf^jC8V7iI${(bNfw~PGfl% zQ@NYm`qG3Q%p%ZzQBxDuP3O~VTsZqvh@#k~xj`m?yURGFOWgK>g)IQ2ib{PtkiaY| zx3JwT)N**P?``P;Ywz#t1N3Dd*<8v=pc~mm`6;>h-e_;_eyT)?P_$2;5%)CYUO>>R z%X!?k{(~O)F=hm8TU)b(C0WERvL9J~?g^gQNTuUeW6Esfq>C%k&*J3n53pZ(4uyOe zcCDlhGptOCt8YB3jG#Rj0=`>qea3CwSX2`yUkC_zbkz0}Hi5}kW@%exxkb#?%ens4 z0m|Qjc6?tpsR;!YM7Fn@zRJj`tW3S<^fa|{%@Ze0*&4t58zZy1D-mt#LS&>Lp58&& zoXxh7>6N==yQhSt{LOS<^CKZbuXU*4x2Z`X`CzSv)qG@stJ@B>0Bu82YY4DXT3F61uMO^5AJrJTtm;_OIK|<94?dpazRd`c2j^BJ>J2r zHYmW<*}nd(OqZr?NXTPSZ~MxOc->G9~5pFvC1y*!1h;nC<(EN#kshZ-@3~=At32 zSKN2-1@T6i*}bpM!eOC5M^qM0k;P9ERftvtK`F}p8YW?yhT@bAidCrBd9&A4uE`^U z84eol!wlVix4g+;B03cwx>aEKIlDKM`p4I(s~%P@S(aM>lusA9tO$(c$VJ;ye})Uu@tz&(hU*F+UWU z`}fu5!KBVKw}m`*8oX*%u5k3+ZRYuvTpG7?NzFW}#2g4&?N>`rohDR+T(z%aQ32}J zNl*{!^5a}-E5z;@)!h|H#jNo0*_m&sni$@g>24r+#hIhd6V=}qD&5r5Yqk!y0* zCMe0I^2v*ukO7{OZ{3YkNu1oN!PmTAiryleFgp0MkeWYz48LBvh^OGf^s;I$eBvB> zLKhJbCwS4`t`2%d&n5O0T*+FC=Z|L&uAutkyDO=wS+@swsHI*n2c;6t+D?8=rZgJ; zE3lhiQpw63ik;*okjMEInhz#0>qMM2HJB9gPP1K8k;x4SOR~we@Hon%| zXJBt=+3Br~^UwGBHt6BLF>}M?#KZm?SE9KL`>UpH@vG9z%4Zi&;$v>iM;7yihn_{Z z=(JHRb7#A2wQA5Cx++uq-lV$5+GdOt{jLz);};v6SHkE5kyrEH>jNnW-CAZ;&M{tP zJki=%HpZj5nk`+)wP^Y{=`d@=2r?=%53~RN%Zf8H znSt>@K6*d!)LFdcGE$fnSTz3IE7Q0txdTK&|2W|iaW;AE?9Fon$7w}O?O1_p6UGJI zzy&or1Bj(Nfs%#8I29XPeHMtV%UFpN1*_wf-uF^mmspafn|L13qRNlZ9V{b5=VS#Q zGYcmZMvg25HYxx7a5j^T4z`7i(q(!l{erGw8M~8iK-7+MCE!15DxLn#D{MiV9590P zcw?pwOIA?-{<9-F?OTT0-MX&Nv9@m*6od78(NGpQH&e&jCB!qBKdCljZbHdA{Y0`b zAf%=&yYhWlzD{e#U}O>ThUlG6FISJQu0c_*_wbIDhZDthLXWy&!jBty#X$kN$|rZc zrOy<<{;iQ08TU};2m!)FtB7Q#g=kEaK)L0th}}`X!;kR+gBdOA+zXK`BVED)2&xUj z27IgVh2y`?C%bz}*sL!Sx*q$iw&!TTSTPa3UwE`5#&dRrX;SJ$kb0-6reDG#a?oTJ ziTnIvUO~fPEi4k4OC@?K7wphZ&fWI&IPD9d*N2dqw0rL;YO}>XPgq&^0N1ial&p_YKbN}BTD*n?GC~V*^D219!6wV(%c+&J{fQr z|3uFP82|_Hyp*tAa0#LruMGY7BRrZZW8zZg^QQUb@0Ey{FSV1wv# zZ~5Q1cjxAU>*p#|7s_buT9niZ-oJ|3s9qbxBRU+7WVCnFZyt$E?x?G`Ay(abCN^8%QWG=1 zOJm(Li<{3D*2Bxe451_wC^?mHrc)1+@U^dL=MpTjUQY2C=~)nGLF$TCwcPSe*XoB` z!MJZgW1GByGj1&w%BKrIIT<^aVwY72$IzoizQ@8%iuB-=!2ijM!o3dUEiT^K9ep z`nnD8f^%d#TewtZjp43WjUVd>edZXbumX>Sk8N-3@13uFw?~FsU-DaD| zk`0RvT3YPiX=YNKrIIjO+IP!wnYWS}@hSpYGDyUoNbfXNYvNK1$=PIK&4ez=`#TS8 z((JsOK_8~3xyiR(e^#ji5&D(@kX@+CLu)t;kX1!!N5x;dr z{!JzIMBW#L>$I@MBMD4}b3>Io853Sa?HSQ4J!Zp#0b2a5AH#c^S5T+l&%girPFEET zFMs{x^^`NIx{R1&BT90u?AqN?&*wa>w=k|#KYX0>t&XErKBG$vUsGk&e*cgdQCaS$ zR+%1SwHQWizpPYM@Md_AxiocJKZFyHFu4e46-g3S$Y}@~Lt>1DSJ<<}>i)nCM{Yu&;Ko;Gy;(AfU#tH8dl3}V%j z$RL@%$nz^3oJyHNx^L2lv=?`RtIbMR}I>i9ps&mO^&RlXZ5C2kSeP zfAgW_^0?sem8dFFDh|i9aL6`9C{ZdH>XyznuPRoMhWV1wOfc3hyZr>ID0ld!+AE$a zz6>$GTy8Ox$4e`MHq{KYsejXNTr9Qv#c#DmKQTxq&PRTZDdAyN)!|@lXJBHdXn&Go z&sBZGmRY!~rcH8`Uwr7nKMhmEPdtuz28@}$5wxy~VWOPVP0qpW>LZyhJ;Xh7!Ho0X zC1ApD@p1R)tFr9XjmD(xmTc;`i*bv;%?C1mcAHuoIsY!lhE?0xt|(kuD0+K7%stD~ zI&}NvDf72p-@Y9hYl$wrN-Yv!^8i;e&QOGt!Pe+kMvUZ0SL{q=HIJz0PJqma>IN~|=tE_}=amhcrSy!;1adE8H*kUp5BM4Vm zx#9s@D+Lm(Jl9OQ6k4V#mshQQ6^K&!TlIIg($omDAH!kO#0Mr)ak!KNe5jA%lCDyL~goiP_*dFqpq#>1X<&& zew;I{*xtO#6UU?VDI&Flb!jP@9!wAi(XV&P5{QW~$sFeG3~c}spmKmx-fautj%|Ih zo3wIgcq{$pGqFM7X$rbPg-JU#T8+EN=I6o)p&NF+0bt6hGPX&7>fn>|I4g3#C`Rkq z$?~vRQ|=36#S}A{uB)L3ia+9a%titVluCBHVeK7r@_^xrABZnmW>Wp;E4^a zLIb2ppO4GifeFmmQ*sgPkn2DclPN`O$xB}%;o=k%im(@^A z&~@E^IpA|+wG0*ig`8+W>mJ9?aQ#|5_K!^spMvN zUfm8$fLdcgad_UFe%&)GlqMr;x7=f@%`8W8Xu&rAZQp+D%v_oS>lU@W z{E5yd9;?#RJBx2}?&52nF*f62aoTHUsprHS9WPMn{dtVte*_)_i-&`X>4~(3R1nEW zVAQH7?Nm>&d(ZHgI$dAaY{|IQbcELRMZ)DNNjbECb$a5WFeVMPvBjt&?szCWEGa6l z$OXG`j3=0WH58hqD1x6d+BMJi+R(-x$8pVlsDCf}ar%jPj4Y{$khR9mDe7ld8hO;s zn5zOI3FRIWoeJKp{JG+RfEVp3U3oQ|-A?J1pz|4UHTK)kc8E!>%XFb_N)O9pvpcgR zpM?kkLW*h&4r`kq^g4N3l$DLIj+@K{atEG32XKA>An?f^z+hS9D*vOF*v<5^Bp#)BXc=0alnk23|1 zZrC328HuKx2co6eU>4T})O=7roW#SOYp@tx79{8TZl!#)86?3@&t;8sW!bm6@Nvlv z5)1jEkK;N5Ga9ppQho3q~}@$4OjsjFyt%3pB_w{SWerK`?$k zsiPS(AMK+qtD8ZU2h1ji(Z)0lU4}swufM}%eRX@+X{8B=$H|8)PLqb6q(q}2jnN&DUH~NWJFT96z-h zliu<@!h$gOOrKZR039UD2xROdd3oF5fIi{gaa^`Xnz$$zMNoDs7eVAUp&RWIb%47!x<~Vo8Z}TT;r7At|&sAOZCN!?6ERT&?T=&3m)!7 za~Q*Ch;3IhOks-V_pWR`jUArUU!@ysQIW;Vf@?gac(2&khIK z9!}LlnG2p?X>^*TnzuB}Ou%9m@7_@PJjAl1OqxG0YMdOfPiHi7HS57`Hrpo-%tdsE zJndf(O`e8CRPD57xJt+Bjg)%xbG0g?oB-}&c<-=VW?TDG3`G2 ztMEsEY`OOL48J@7VUw96=@Wv+vG1z49)pJmb+}@&1^zG0=-ufGGy1}b{^5RBpW(&# zo+%3_r;B+4j6@n|PWS(#-%HO=b9Xf0b4^CQhReJ(@FSg4B<605a^wEC`V5@PfVnj$ zHrr3hiDWq@{_Y8D-^`P7eX3_lh)^1z?#Cp^**_Cug}L6^Msh}dH<&iY5-?^M{B6pZ z6lLqBrL2^JUx#UL>vKe7`1+nIx2Ps6t3s>UE%H-xL7s^kAlhRD;~XWfL(=??^^%ph zO=+^KjJ-0L!T%uP&l0w;^q9a?Lo^;M91U`^xJe7?EtK$|;SLb$4=%8H#*?>KEJ|Zj za3`xWR`e*g=5Uz)!mQB3d#*0sDpVga*T`S&4vF7~8#tfhG7wF!ZWt++@So!ju(2~x zBq^Krv(-YpHz~a*V2!aWB{jokr=LUw(nJ3ty|v8vr?tret zNdtULVo869X!v)ur{7f!-2A&;;#dUtq8dxKJyYm> zl-R4gH0p=^0Qu07p3{BcqF6{=-Z}rE}u7^#0=K)i<9{?P8r6d4AU4T(zLt&H!|u96Me+pEGx7DF>b8r*c=>a=xKf1yJ1Q zUJoMRvPH9Wx^D9yN^9Aa%-;dN~Sx}T(Twj^>L!1V2< z`3;>plUwz(0O{Y6%Fg_?6p|^-JHmPDx@AEFN|QKWkUfr*0I&l)91>TeQfwPU1`ZIz zzrWZ*$|v?ttADI zwKH09a6FW%ytwD)wk0nD{QxS*ij=VxHR`xf>iOkgC72rOAqTlYxZX>k^@?P3oqtuV z#rD?U`3CZhzN5j%*uAptvtON6Az`t`$MNuk*h#8p$&Bj|Lh=8VLG+n3cKK$G_eoZa z7;PSdOB+16s6z$P{X)n|7}6q`8}Xe&fiS?E4jBT>A% zL-sP9@b~J?D?l1Fm|=ar7vPB>$NOLB0WNw_`2Uql3@^(i+TPIzh@sZAyqHg;Q7oaz z0&Zwa$F}RFt+k?`Bs7Iwuh*f8JosEB_)SzQa~sOX@C9(>8I+kGwWnQQGNiSCk+3bl zH`!4C84Y!&AoMj!brp0P&Cvk^I(!K_w#^TBHSzM8Z-8n(O(rWp7M@)rTS&a)SGf zPHv#3f`d!=lT$Jg*1KLmCu;luINLS=IuMSrz#sW7$QzNGBxQV1*E)wD>-)f`h@l}g z76D5dL^4>u_tnvjEk3|NFw)4!jWYPuyYZmG!7N|b2?<|sQ-7W<2GPZ_>VaS&2v=0x0dz&`Q{b{oB^0{RB8m z{j3nk#4iBxpRG+{H$xxJ;l|hXH)NakUZS9lc3Do%F`_WqdcU%dDMt`DntOc6^JWSC zJC6-t%T;*j^aG#^RSZeJ)QhGC>XiQVf+hdUTQq&5*sF<>c9;4 z_Lgr(&EDqm$_I#gce@Ho6=SpMOuYl64lmrEdV)%nC&{=?Ub!^-dR zxcR3u>8I*CJlomt>wk8yf`^s116;c~nmTo0VsM8%$WqK(J|h|Aau>rvW6CC9TP&zG znsJQke)Gr4>WOQy;#S8ii|XCq94>$xC&HoCW$eGF4m5onK^j<4Y9gHtZZ8_DbGZ1y z^&a;Gtnc4Cs5Ia&&q&=aU?;|TMK}Mc)&})l{8eCOcwm+;D@9W*mL<5l0@I)C=cPW^ zr*&yARQLcGunNNdx~izD1ElQIzyFUC1ICB%&Y{Ly9+0tCqt$PX04? ze;mg52b04H_<#aADUIwK_8wY%o+2qJr?n*w_HdqxbEdUlX`!rq^3tQUqet$zX(fyq z3!DbsU2?~$tN2UU+)Q$EWHWBmzeA#xpY!A2YhyvM*KCV>ZlS$3lmo8bEiOrMdGZXS zUoj@zTfaB(!d6W9qG9_5Ah`1e3&_IlvbDw$Tip65s)mC#wyn`M;P5IO^{}RmPny&y zEbbr$0UM8q7sM*;oOnD6!Om62a|*$oZU?NjohcF^UBz770x4$m4yA3zbGyMNBgv-- zE9;+29pl#(-#-h&m&pG8UzyD6qb`Hj*5!7+V1|PDS+^K%Y`ocGchpTal=>awOL{}3 z;7VDw@nfPd*MMRrH-CIjS9V2uIIp(^?Ee>gK=*NV3-lrUXnFle_4VanUS3n)eM3=b zKz9fRn1Oo|->N^Y9i0?=8W8c4z(1EAlYONNBjKpZUETTy`d}C3_^aANyKK5qzjOdCbmUF@6##AOU>Qc-oC!VnjIZTK4cGoh;vq zxAd@Ejd7qiH<|;M|Eeb~2#e7|?VV{&hhS8m)Vqs_Jyb{vX!`6qRWFX4uBpFgQVnrc z`4Y1JxDA;=VsTh;e%AcnG~m`54=zdNjW}oOlqqddBp^U(o(_Z8eK&eR+jV0_S;4Bh&Jnlm5E!Oduv?fX~Sf&_D!dPMi;slB^_-fVO zbA|-D7c|O5x~96!|Aai}rnb|!%6QMI{R>r5(cI8x_rtrP;cxcWdUq*?CFr;R&=eCK zO@UJJky%pCA>NLSo5${#T9<|9tX~N`sq_Qyv2>GmUn3evJ%aIeSmKc2&G74N5d5I# z(YPvOGm$&AZ6ET-R}$@dx&lPzBH=;CPbohjq$8`8X2vp8~Vxz8UkmB_jP z^+!sCfD6svh2(Rc$71gpwrVSpC~sQq*(>Lx-tc0+C3Tq73~9Q~ueHqsJ!^L|B@3;q zo)lSEJukX+lkdRSo`X0Q(xjwD?cK%i%v1<-jYx^#SP!ts6A*VXRK$MVx`%zowxS*F zFHtTZv(ZWXm0GtS(`?NM9#IQXo=y0+*zieGPXMaev6#spb#}83z0XeYF3TdaME5Pq z&D32=o;LC_%3%~qL_D6_hw;r&|Xsh!j|*N_&I%g4CRk4j0NlwCWs3!P&}$XHrl$iV28 zMCF0f3XwG|&;7+u$vO{x+xjXg$f9Jq4>DeP&lCY4lk$(9)NY39fXS$wpm!+2Xl2Eh zt?MvLb%RneL4VSMm>sR>T)tWeWSYuh(P zM^_7Gx+uzaic=Dm{h+9dQ+ep!l#whn^!cAUuR{;GIrbZAw~{^DqE|kBNnY6?%!`S+>#5Q7sWP`xyP*3wG(pjLH#29!REGMAU@v0T zIV^~tKJwLU*`SKiD~YgQXi2RS@pRgQ|g6L{I4LHZ>pCsVuIby9re)oWOrupy|fcN(HQs(fyjcx^z+@Y21ySD-` zrbep7wSI*2)9Ss}$Km_z5l1t70A-*{PVaoI)Eo`*DUuRjj(XW<{nLt>LQ|P|;*Uw&wL{O$o{%$7-jY4zCzpGfSebp7onM>&zdQU!woB2&{BIr_%1fsa!b4CNKSR zSUtE8?LIfb-eT;Qqri%xr<3}Ghd$$#MPB5%YWqFa$Hi$iZ|75_1Dg{jMP#vwkmwgh zGRCqan`%fP0=}I<-~)RSeC1%4x7Um9I;$&UOBHn^(SE~2JN9DMWOILHNA8-;t;Uf|tFznRoSW+!00GkX41w~*k_6>3r`hzyCHq^s1&@a2<8N+e zj(#rIJCRXiC!AS>hL=mrQF`L9)@^ZbslsQQMtdBKruJ(oES&jL+>_(HU1)&TbiHhH zV)LBtfg(RuA}&O>XU|v+jUl~ZPj^>AE+`8k;r0wTw|7c6YH2Dc^Gnq}J&I=|#H3lh@cH)kL%TtlZGW-%a+iy}MWi2)_{{W0#dh207a0>_Zwy zH_3E&eW=d?!-av18+hZxV^FN;P3q83xXBKBs<65C0MJz5?z>9w`21aQNm5Shaf>T; zFx>GY^-(Szn}VOpAqZq&;td`BHeeu-u`NWdJCJ(@3r(~d2#3FPk6Hu4s>FPNb^V)i zFuSMr+v*LNJfP8Dx`AWD;CT;DBzouxqxI&&`~Qd+*wwHzHXnvkP~NX_TvxRZizuaI zDU-9YWEQciHitWW#NKdDiGKzD!4!o!PhdY0Px4(?y8r-%tu{362*q z;&zzKyRuY7j+M_+r7KfuTNVYy z;TVm}MOTtp<o|O&@Jvq{R;pKbZwO)?RtPgSR8?tJ%wJJ)MCPld$DZ4zQf-(S^@WPc{&wYH;T*htysZr{=EItf3C9~ zid3Xb@OGW5?*@_8VwTU`LP1g}U$Z>?Mac9a&m^>YVmhOrRr;SiWXrv*x^My%#t8;O zgjt1c9cxuysH$d%Y!eZHeokl&N+~&hqvk}@n0&u@x?V?55+Wv-n=qg!3co&E(~i-? zWEqo&U9@jcDJ@UX?EpRMvXeTFB#fFuQ4U?hOUQRp<61;T%2PSIG_wlGj6?6}Cy6$c zYE@W^swTU!?Ob;;na29iCh9KXDxAh8PvrEjXn>^>^i87teba%f^p~JmA{(5%`nl`0 zj#66_yLaqJ1eb1lk3QevQy8}rK4-Qq+}JYUk>&}HCS7GwzFReRx?1oUmmr38plv-9 z+&$xDx|(hxWx?6(bU~9`iqv-m!?f@&hiQ>Mdmg6;wpDyw$ERL~CrT)7V>}2S;NKT6 zRfu}I01;c&D=9A_#5m}HQ!$@S(Y6RJ8rHK|oH`-fwf4X|Z%@772l*~=C}`k1oAt)( zO%j-cy9f#;EPc5`tG}{iACL=*Be}!>f_KUHGF}M%ciIKpR`KtjH`PCR}e$+0T z8`Zv>=|CP%JQA3Y3|CMqs%&by{Y9ekAUyYS`|>ptfr-vXl&$sc#5GD;cmppo_Csl? zgPu~G9+!^Qc1ZzDg;eB&8&qd@48@B^X4{Qc>jP~SK7qpRhw0~G4%_7-S;ycmF~?b9 zP%rQB1(sSi$#@RxiNo#0&&l2M7?H1NVNqa3MiLiyGgPq|*33;+vGsfyF%|WnIbm~z z9KT3cjH-9=8|cAZG-t16Sl+Sfk#H*(^;G5#Sc!l+9O~aD0nbi9%IDgAor+~$#;?x= zDDFG+I;E*QWQ56M@rI|)Fa_g988aJoFQU@7ek=V8OlSa~edwz^ve1)KW#bgLs!kk9 zM3vE9p2`)^9h!c0i=&t`AY>Rs0{vrZxn*uv_p|Xrg-}eMydF>3aYN@oz^T3~^D1sl({#TKoL2>z!Mbww1FbG*&hHn~{Fe zbmFsXCJ(IVE9QP>X(KWz=y8VkPv%QWkra*?C(iytwtyqwxO0%;qVH` z7j<7Olw|D(tpI7Ow|!63j3?oyL}Wg9q%Z5p(!Q^eAWVOdbb}A`$?8b^?_gz*%tJm8 zA5Lmjn!o53H0%$4z{tutouTvo^?wBnIir>t!?ikJnFY#~bL}0iY3-$CQgXCeN70(h zk{2v|+0q1K>O!2hf0*e8L5$E#sCFMZWg5`F=GphQ?*o>r`i7U{YzlPg|Jxh=$x!2R2XEtQlQFGXT6+x2u2G_ZLCDto-`71SzE?QJ2+H*@q6z@qcA1ILLg93_4}1v|jTY7v`dshG?ld#rS8ryY=zs zMUFEEQzwpcHJ?N)wSr>;ps-M5RUX?Gi%k^F9}ITwT&Y{*aLUv-Kf`$D0aYv~;Wx6d zvmaLYv=B0?nYCtEVr?mV?qNd*#tO-W=+6U3x4vXcn3l61;KO#idf=@0z2ySg%?6u8 zQ%%#<8G39)rQqi_KpI#4(Qe=A@r@SbSQ`FEN6=9WG&0Q*H{C)XXgPIoA2V%$>-!X;H_B zA~AXS@Z!xdm&c2uc@iW`9jN+!53j|yJM2`@I( zHbZc4m+$5Owv&I3TwDu#hh^{eIWS@azCd?OOSb>^Kqx)yd}v;z>RWc0sk$IWEj9f~ z++8wsy@<^zlm*}Y?8>dX8x@NIu)b27TWR*^->nN@^JvS2CZlEX`9U z)bQ`6CuOZoa%-X{zaO0^tZdFir=mVvcELn+N$oa$zKce&#w>bN@-47tK8N`+_QlUS zR*I3G!D<>?H#L@8%`;VmSAX~Y;Tc)4yvO1T-9s%!r>)D>Q>zlkd*NHme*glow8YqP z8ifDVBSi!aNaaTrwqT zQ;Z|DWA4QDe)mQRmsspnKm;){oG^(Qy=RnimwKC4Rc2Fm9vqR7#UNPdCE}hZOh(Fd z4gd51qfR(x_W?mAX{JHn@Y&-P5449qiu`fs%_9$FQ>>VEXL4%;0BH`_&(5DNBkO$X zC#dybku{wYuWodMxU|o7U$Ts z@ciiv^XEKu+tG0nV5uZ}fA<&9&dU9u9|D{R^W!R<++A`Z=>4;Wag#6sPD2PLt)A{e z1P7RlS5=9itB4i=FgSb~n(d4ad_uvej(*Z*Ua6>Q#zo8twc{>(ALtLv*U*&LWCyE{ zF21XW;~*7*<815-&X|575H!L%(j)#$@)rW|Y${6+vsTZh5!#81ipA=YkKj6ecLC;- z*X)>*q3BLWIDx{yyqji;obuU4ClS~(t^WqF5B~FQg|)Gyk+T;vbi?q z%>Eys7=WQcu3T0f=>m^dhQCK)oN#@6Rl>eZ17elI&x^^b#6^{2VK>A&LQ+77DZh&c zHCS`F;#z3?y{Jj(eaSvWYaiSzKY@Lm4DU-9jaGW&l_*Yw{75O^I5NdVKMnvtuPh}) zB5aP8v5hT?Y&YBuSS)a-H@XYD+aWO{qV$PlJ~8+l=Afo)O1~<23I_brhhU}R(f~jq zMgxTFako4$A)KX$T91M8JDWk-`fdNGfh&k-bwzw5CX*MC;X4|lQ!BGzyL%Z8nFL3v zC;{EoE_r=`u{tr9UffkPncB0JSd|1&xBl|Y><)4N{WTA1*znt6QsBGxYrL|Y7O-3p zy`P=^1UflG(WJY8lLHk}h5=vTYiRsG`S?G|yPVNjHe@w`WV#STw+7QQio3?D{+&&I zD=8tP$MA6j7ko@B!QzQ?OU)i089i|n1?wkC=Ze@9J=nRfdYtHEfQG}jl;2gRo*n&( ze24zq*$z~3fT@5SwmU>}y8PsCZ2#*=NTCE{Ki)!@mOmsqwFjP=L;5ZE+2b}rQUbe= z?DTYb*quh2&sL&#c7(Mw=Yg9~tB<&aLAUB3J``pDt%&%7W^%QoM%HNZ4zZmaHlGz*pC?oi*jsI2`8nBMl4okK2K`v5jD2LN0Y8$wGcv}iO^T(M1XwmZc%U!^=9FDpB z;ovxUs{e7k>D0dNeYfUyeiL@<7iQ!<{Zv9{IWYKZCgJ%(Lt|6kg7u)GsZ` zW;|a$t#vPHX-#q%StxbuvOf-H*+++tI?(|*zA0dlh(9%IPBV2U?ghFP886nrsAO6L z(OS-a$s>1RIvTa0_dZ=}y4r)P(M{Ros(f&HKO;Gu6ig$&N87&}(Nv?*%#$Dx^%=Kl zPyS`1ImRxT+to#Xs>cX4uOK)^@mQNZb*wOJ0OKsQ5*nGHds7mT(r828M+LBcISxis z;K6~+4l?(|3RpRVq~av83N<-%Gy<7hnV$+HES4niJ0{dDf?ob#OC=ka7%vCs&LuN` zw?ClPp1Yj37Mmfl&{V(0zHLJ&|;V)nFjurqpj(pGs((MALoSQ%k zpsPKn@~KMckAwQMzm+!de8}0SG1`^YY0ntY8mr-O+5rwlIi2EQVE2Ei-S2~#4bwSv z9og6^ow5LFhJupZDOCvN&9Wi)FC|VkX(SJWsgpZBL=5WW`xGU7xjYzH_gi2L{t^w7}N5Nu{5-U-8a=R+l=h(LwOAgk>Qk)7@O`pQYa2U3 z&juEZNHJ-$g&xWH(B)vPCyoXW`3~iAPz^Wa__G#$e&8Z-C(zkV^O9wqc^C@QWAJO- zCkt=*$XNJ3faOE#4`~?QHbE6!)1rT7FnUgX#`!&oG=FGMGwrVQW?iL*BwiG-MB0(5 zmx+@AQtt5(Cl7zT?0pA)PXN<~YR;h-$fNDFz;ndVV&{s~tV&&Db9-uE2f|@K&onpt zfsEn$doVrH`=2g*N7TfP9za zLtrnnvM4HYKA&(?#W?a&WOgOak| zpnK($m5%nY6>AeZNz^t3@hT(^&b=^|2Q!1QKprPe9pFnw9|$0LK3sMq;y2uC){DCB zw}|afb#18EMUkfm#3jcNnVabfG1mYEcbW~&fFS-7q?1z83=MB^5?v3wE+z%ZZyRjU zDYr>o7fhU{m`NQt5w_iUEO6`X^*nn`<>w1gcpS!R~f#r zDG#od-^47LxgA~5WEDtvXUI~)9bW*UA(!vB9z0JC0ujZv(@%3N_gNmG3uGQcZ`T&C zjw}}kxe1PWSLRb9th*urWDNfffkzbidtEzC5C>TqUikP6uh!s|7%`Na@cXc`JC~Av zY3Fpi5vM6lZ(l$1I0|&`#dugnD`AT9q3&?HCk=n5z%rrCEHgG0*qC9w)=xY<VwM zLkoAkGhytt-38n!)}sEP{PAeN*XYvVei#=Ye|dO1MNV-Ey|KlU6EP) zwcXh7lTMz)`?s?{(NEsxqJ8}tb_}O>WM;`r-PmPlegoV;0n$xqJGrumG=PI^3Gk&H zLjS?n_y;~Z-y~F^EB4^gwNb3c_W5Zq**mZ}|ANTz%A2=WV$>o_PF^8LUaNHL{RmYHiL@oCJZ@r>?1`8haG#Ihz zUQFJ;5XYX7$*JH8aDOUiPl|>mB{z`T8>d5HRg@XYC2NCS(i;v#j*_g2RO7iVV&8QC z!(L|vj6Gi9Uc8-52YwICwwoWnA#!2TFW^LPPWS1Rt0b$Gs>m-auI*R-THz%uw!`T* zL@eZ(<{gT>`nl@e3+H?Gx=ZpWS9l~Uk1AQtzSJmp3*%1b-d?vNGH1I&q@LyP$74jC zUr3QrK|78*EK1J3LI5c4rf9a%kuv_hSQ=OtBf3`y8Lq!KnYCAT$V=NR{O4H z^Ef3^#!09C7fqXsf-kLXFcN*U26Hjv1+gcm=FL{wli3~uv;4HLNlw^JrRX;{ee*x|8R-LHNK41tcu1LZ6?kaC~&g{L-U&?P_!Sbt?C_Xb^!* zVF^J=*BpN z+~!(%XC#bSMU*1?9-48HbM+o1Z)|k)QgqLm|Gf3Vb{0D2ft7-303pWI8VTdq6&#B> zi*v$;9UV}kh3)CSx`>1e-|9&No5wv%>(x|v9f+Pc_e#jqQc3xp_H&t`CmBvJZs?=) zgnG zGVF5&u}cZRUd)yK?a23o8MT|mR^xH9Pl<*7Cg!3CEY1?4(0*|fag7WHWBXKwXH$0eMk2nxO-7WAsWc@v}7xFA*+_v89 ziBG3mZG?PLgZt;tJng^AC#Aga3go8V^ivAxv6P&@e}b#XXIChEs1<`*N`ipZd}3Dn z>IZNU#?lmYLg&#(ekv1l?f`$H_S(yY;;)w?n9Qc+qXqlPqI6XuH=50 zWc$Eh{M15Op=v{GbRWoFz#|Pcc-~ASONZ&eyLk=ohRYCcs#yg0+@@;f0EDX}g{NuJ znYTLax9k+({2MybSzt(*#S?1A*Pg}wM~Sq0`3oE2{)ka1skNGQxWJq)i#_j@c{~_$ zOj#WrX)0@j<6CySX8oj$Gh*tV@m)iS1&(DXf4@Z)_=8eoM z<>J$&!5f*9US9@!Oc^a-^2vKF$1cA5zH2dlvNdKV`o!v@NS@Q}E~cw6ug>K_%W$z( zkDi!s`WGI~$HF?Db1K9$7BApdvb!m_^9gAkesjNnTUAW@3M(8C7Q|ePY`UWGXy2$u zb?9h&=@%^?1I%_5E4Ex_y{Y$d&xo%tKhlrRCv{H3JHigs%DIKN}wUb?{{yAHwFoE~YAlJJDL@KX*RBu@G- ziIb1OWN(c=l_psdhD61LHU7K>iW)kz&G2J&nI7-!K{b%M8ozk)|&F+!$A^N!m1B)G;`)XWsWO>v=z(-;?zZWV)}m{-gc$A-<9Y z{TL$MY%%Yv@ZlGdIo)>FtV`$XZ0?{t91HI53@yXTEF3ihqr zz;Daq?K{X}0DnDGKIqHt{Quhf@_#7Xu>BiJk)epnTG{I%QTC-$l5E*!r%=3-W%_0A3HI(j zqDQ_9Q$LvgBx{|Kj91o>)^gGR-#r>NugsKM7OE>Ww{FNl*F8}dk2q(Lm5`+|c={H7 z=!Zt(!0yguA!Y%zct=M~U)UkBG*Jx0B(5M@EfqB&sFGkywERmyJ_Mu7#w3@**rYVVfRl$0Hv1CF`m;!_*R_2}(;Jxaa+ z4cIOKM(Ezfi)^m?qMx0kG(8<>~Dh$f=tTVV~i1{1<574^@)kWe4 z*IYh;Sd1GZ_(odR<_e=!&%fD(Wz= zqTeI^w<8YFyC=q?KRGT5p_~}%XA3UY_8w0m7bg=I>snrA94RGEH=u+xJ`Jq^5Joo0)PwU66AQY5Rq#0{!G zsoU!Q7c?A(JwrUh0Xl5-Dt+7Edsy;#cjcuL>Y`hjF}6$}?^}gk8>(Bb;_>gb)QPzY zzo?sEgb{hB0NM_WwAjQL@Je%y|p_D_@=ijA%LrBKbq9)evu@mZK!EiBP% z`O@T+hSachzme-73I|0(%@lS4vHb~kb>gShPN#{gO-8bH`0-9tO9vY4%T^uV9(t2-QkGgv^26&oh{~@4c=Ot`?fych0t0OA=p{oE1|M`Ir=61|S)4 z=cnJ~R$(y=qv}kpOix3y14cqNuiDaTabZ0r(*++0alB0#USX^&2UdSe$Z`^dGkmn# zbj#h@mQ}JO%Nu3tbj!1^4^1^Vq%V{EWc7Q*ZjhEEGRuo1|0pX75oP0g;$)rU6>m1& z@8HM_*fs9NUR&VMjlC}_zhR2I0VcdHZMV%5t!#Nm{vyrfY_{a)5B0g5`!gYBAUIeA zgdRKW(6;L&q@`mnLw_25*>wlLq>?Q{>Qe^-bT4m~8eIPeAG0eb=E%~k51%Ooh#LSG z^W6O5o#Glhx?-~^ zR>wAs2r}+B+CK;J$vJoW{a%?5$%+BW^821$54w0njxRl!FKSxR&Abji-0`@1z#M!@kb0?Amnzy(U2so zu~jK?UiPrwSFeyrY$O$3x+MW1DBNyeFko6pY4!XDX^*1X0~)5U9iYP_9;}fHl*S3Q z5RD6|hs<(IPjJKJo}|z?0@uSvP#iEI|x%}e`%S?Je(UTyDrEb5K!weh-KHSy@)8-F> zJyukkf6AbN>(>EJEi*cW%wjipaiG>Hr|04Nacan0MhYT!YrOgMGuS;G-_39mHle{b z)gbi_pGrmv?wdOJW1@cR%Wvf>>w`4=6SsQZ{+!lB-u^KUTxWcPN%q`QDoiS{@{P6c{A=9Ki+XBo9}AXg|I(t;>EnBYEb|R4XoUy$DGnU`BdM`P47{ z!q`z1xD;FUcd~!xFd^V2W?(JP5-?!DTkE7qqRf9Gc_Pc6S7bn=&_vvigtL~8Dd+z~ z8>TLfZ$JCYp=bWH(iSF1q!KYAqVdlVP0YYbo@IW(UOF=G*1?E%>e4*v_8K5ZdPlww zM*5sL>Idu^95QCCK=R45E`6GzShET1MbNU}dPe&6LO63EFxymxWvl*Z{fp4~p2YE< z2}u%la+C-Vc)Tk0@#V}v!!bD@aO0j11nIKDaIy6+2rlX>!q3oVnLm0S#84<{;@sqw*QEgw7;ZnnpzKzyW)h`~cO2~g)B8}JjE1Z?Cqvcl0)QNZ{ zFR>gvySJ34HJGX^;2PSHkNe+%^UQN7a$C$@mLCr!*;nwe6kl48dlDhL*6vgonMi53 z9;2RF$)!lnXm{=8$mP`THZbi(0es6dL?hI49!Pq486fwvd7 zY*(0oy^Ii!BG16QWH@6tYArco8WA60Aa9zkBof3(o7}LpuO@x}hxS9RP@xi(=n!Jw zt{;puV}(#FFl^R|F3WoO{#ct+?9?<*pKuXgCYJl(?uZqz$jyMLFel7wZE+2`A#uh( zM8D!kjr#Kcm9QH7?9&T;8V@1_b7Ep4g(>U8Fk@x_`+-40HtA=1fK*EmW(}(oW_Dui zMPW-9K;||888Z$tE+<^M$*t!BH%;9H11YM8Z2p=tN#D62KDYR8{HuphDt0(v%g zO7I19dU42@mA3JiYW8U*0XN{EV%fr8@?bNb!lG~Y;g z-1tkb1R}nW8i#$@WAOA}i03k+FsgOzf)6u@QN!(8CdIVkDUxw5;{*82&|M%GXoAZD z2I-;ugYrs3Zq8f(Yt4qKzfK6_n-=F>6pkikeZ0moTVCg%TjGi42|*RzM4l0=`QuK= zh;SaSit#5(fZx0q+}-4Y^SoD{-F?(%Hhm%2Q*I~Jt`S*AGF;qJv;Yzl>l))YEK*u9t1tiJ(1DR1 znOmuz+Wti6eyX&?+s~d@hdyNQT~69V5&63M5JzB>(RXOz@clX|9sI?|4AW@9LE(K_ zO2vDL{KLQd*noyX30Zn#mxqlN}RW<(Nn8*MZ0HxxT7xxLTZmzt|g0Y?!t=5AXv&V#BrktOs3=v#+)mS7<{pJTcXT0Jx7mUr*#$wxFAfDzmD z#iUH+{`P~I4`Ni&w)mTyw8aF9)y+!j;6Q|KWx_mF4|F;st`@cwJpnc~?Avvi4iAJB z1oHOp3h53|Mzy{j(D5}9UWTpCz{0sih>Or)xm%Is}G<%7+sm!xua}O6ta&Mv@ z3E=`Ks{-yT8;GO^`v~H8Y$Wh79&NmmFIjD=G7D69%s}(7uSZOeV*YRnE=kH-Q42PM zc`LgBEzCqng>wy7l8Ae7_BMpTeT(K|C&Z7lV&9|iTTq*DNCc@eL&y%T)*RxTn0#`Dkdt021ccewiy0 z*HqlHau;w%)1=k~PF$-0{2CN7(BM!3?^_*4R63Y+U;|;xl+ZyHi!lAxp$nx|)kV%v zM+O%bN@G6O5Ko#UT>utaw9U;kcTeAUgh;rnEIu;z1r}=NNKDp7= zpLC}sv!>9<_S?Aqs2PKeWeJ%?hgzPo)$Or7iDCdHtG;PH5w~&7MQZHeYT!rbfPxa^ z#1=n%{943Y`ke@UBg@lo`;&6Q%@=vUAd-@KbcxNVZ1v3d(~|O@OZI&__uwNkwhZgM zq(rH%*{FM5w6?ct0HQ^c_QyWdB}LgyNkHM)C@}_0nbP5h2?u~dwte_W4Uhbfj)cM% z$t`ZMFlEqCUw3H5U*>qJR))aDRM7GO%pR89sJPrWm_PUtB=5ju4P7=O;?a+n!I^7- zz^Rfl-{~44{1-Z1`uL;8xlTx0V#Y$yfrY$RFh4YCGL#Jyb@6a_+MmLlel|jHizz_a^HtfnsQI zZGpOLAVY*z{iG3dv^n2YD&Al7Eb%zppEhA5=U|h-%*e@)4pd*WuvP)e_6IdYc^5tJ zy`}TK6E2wOUi5$<8JMYhS`TyvM$jP>R|Q3@-R()!efZ*o-&efMg`-HQt5@Bx= zP&YjN@bBV1-`u%7*TlV#LbTVY|36I=>kg}5~V*`X9vBqTy5KYlc z{-g1M8S^YuSGG(Se-w{#f#)(^y~P*GU^zqUfUyEvDGxP7(1S9tg+QQ;4HQo{qTmbLuOQ(HhPQmXIllzmt05cp;Oz^k#0BZANSLnon-0A}&9 z313keY<>gS8VDv1Z)g{Xtk~tuy>ITVX1cj2F9y0U$NHOO^n-90yekEWVf{+zwNw;S*Tx|JH@I&KPwgs zvg(ODpcfKBsY-T^qD9q;G zmm1Um2WnMjL#RA>3x62DaT#z@G4zi%N)4;IRYv3(SFPiP0B2|%6R=}x`r0A+p?arh z7@-B($~zzq*5dL0QvKbYi&cG@!X(NQ7SxZj3R5ssC^q?o?( zINaUPT@!QNzn#XTM{|w`v_5l$co3O|+^24{Re_f^Eb)?fn|Rw0o=1Qc*#A+5RmrUz zWh95ChnJlq*;b+&1^`3={K~hS@nY$FCV()I!vrdR0VNn|-^%)e-{T>ZGqd#FLd&S$ zdrt$VJ&^tDz%h-4$sS)1?h*UZ{v^hcppwz)M?6k>XO@K2MYUc%@caoAK@9I881?Ib zl(%s^(z=2pqXDXKKJQDcD#US@oVYUqy?50)Vb z7*Qjj0jG^MZ5LN33{Do_?8Ez9+o@K4&6qztk_AiUK%=M;4?I!kP^4i@lhkLXGQLq& z8ItAYq?I@Y(CDS2bC0%BHon4z=zX&9XLRCZA_9G`$jIUdnkS9Ub9OgrCrpG41rvdn z4!wL0+?yCyea1xwa01(0zgy%HdKwg0B;a|$Y{{aVF`BQo-pj85Qu(_Jfyl1W5R%i) zLQg}xo~P8y3mY{QJyP5ty86@gqw(p&r^3N9^wQG98zBn{^}qV_4(QvX zytCT5*(sIlbv4y0UIGJ|2S^1x?&ln%sa$f7M~5-$t{IA2ztiut;AM z#tvxPRGeEpiQOsBH|~gG3;v!Tqf7NVc;eTk_1~h2?CVCso5#JTuVG~W4Z+w*?h?ni zX6^u8_M!2mq}voXTOvy%kdt>0il<<8to}bpjtcC7rMP)nkaI?nN&sZ~m~HYqgi`w*^SU!rlmc za^Lo$a_6;5aM{d&XpWIqKTC-*W%h`_N2rvGSiPL&@2@Om0uZ015kLioYP-wiVgJW2nSugTr6 zSgjlgID%hNkWcgfl^|b2qiV2YA9x0t2uj^-PmNZ;W}3@(9{A_+>8s(5o0r~vP_JoL z3|*qn{5ouR+UR`r(SND5D5c^~ZJ;vv>jiS|58;n_zc9&54|HEOZyRA|qel-}_R2WBk)uG4|dj z9L6CEH7IZcCIz>a{2iFp4rtYgj@D{Kx7Io;M0xhms#w@S#jVRR%vdV41&eq+uZAg9 z8}auBUm6&GAI6J~Si-9b6i2!m<~=Drl$>M-)|>S607j;5g+HumcPlE5F|PzZf6O!z zINhbYP;rTlTJxI+s-ztovDzBgxLr}?yBa6P5tQhA6|Y1y!3d||N3wuN^7(k*rBzjC zb+Qe9pdnqhMC<+K9oWd!CYbRw9Riyb=M^E$;uH)~QnMa`(ufNo_#b+HyHD4VjbIM; zhw(X=u&k-t5!RI{9@y_Ix${mMLB%YXB&sv(#pLg*E`&P6IeK_5lU4RDL8BrXRecLq z?1|pK6W*R8U+e9{$*Fy4!U-c-q5|I!HuJ6=mUUqb$y*L^!i#iIVA@Qm+g{HnzVz}} z-|W4evp9%LC3>P-g^LBVm4M$IS#?4j&qaR;jjQ~E z1!jNlAZJ~h?>+u6SHQ@XUe-=f*i0udvI?rLl=vH#cJ5D11atJx5dL`Xy0OO-VwNxW z3d=SZyR32-IAVnDXke4_9jBhdHNINvJ+R>Wf`tj&e*@JqQ-kO0 zEqc>s!wvj2=_Yx6h)y6za6knsUxez@9nx!AFNX|se-JUEe5%8+m7UOb!rf1bZHJrU zXH(XyqKJoVbE?;BjQG2`cFM_UwEDuvP>u?)s%cJG$%%~fJrfcCtsyL(En-B|PCN26 ze|Nlmeu^O*W`#jnn7#Y9??qh(t&&Z#5j?)>6a}4mu|9(c&G3J+VBgw`9vLQoE9v>B z+#&fjH05)!#enV2>gJwXYda@z%Jo+au|j>&tRr7VlkBT<rDFBtY}1K1Dx@S?WCCxZa}hd!)m#NZ=^CLVkkrh~q)Ba4QQvG)I#_J2kE lUtj#6UBHz1|2juj*zf~l=QW$!{LpOEJ8r1+;;7@*{{aBbyo~?= literal 0 HcmV?d00001 diff --git a/docs/_static/llama-stack.png b/docs/_static/llama-stack.png new file mode 100644 index 0000000000000000000000000000000000000000..e5a64711450f327d956e4bf39f624804528f8622 GIT binary patch literal 72643 zcmdqJ1zeQtx;73t>L4l%h^QzjAs|S14c!eAg2a$R4c(xF5()?c(xReBNSBo200I&Q zN(%@g-6i!sFR`|JyT5(T|D69hd#&%c5N4kDeV@3W`?=$~?stW%tIC}?cJ3G+9^MHB zd1*~NJc2CnPYr$qv=kp-F$90`T{Pv8crV({kK^HS-gK3@>T2(4X@f@LF>p)mePZCc zV(sYS%D^qnz{O?ib8%~P z@-T2qa`S;-oP6wjTmlAr_nTUw9QHd@arCx9qfHsOWO>=IfUcNTOwDZ^TpeAk8Mu+) zyMlu&3Jv~(X7F203;enY{&8M0;p8>pzXU!?Iys?HdMGm`8!#JLUT%JNFd*niNnTq; znSo0Rd`8>Yp}-$Gl)0TF^oW!-#?cS#Y`hPavO96ED^_kc7ATkf-u9k!b#z3#+Bp4vqq(Dl1Im0~3HEz5 z#b6vg{{A*gNA&*c_U~~53;(;`p$4trL<+U)*;u#&ErQfmfOmg-P`f)C&WX>!5 zSL{s>Zr*Qju{O1E^w@iTx9Qhi*^}deG%=2jpub->?bpi(H`#kiq0o>jAINck9H4Rk z-=Fm+r1+xG-?>p~1?K7B)bW_^hCh2Thia;0Ob4cJS*R#pL{K!?=S!qmwX90RT^9=jW~ z-|T_{s$yY^@d6isok5}hm=qA)-X#8hVxal&rn`IQ{+J~f#?j5eV%Jo0f_ZsZ+qj~% zoJ`H3WqW`l0otrx?a|;Hba`JaD2yxW;DqcoIhp=V@Vk5QuXq1%Iv|GvmdAB>HTyk6 z*PuD>A2^;X`+Mv0H}&N{P*Uq(RF`l6In(_UWA#VRP;dVktpfM#E9JkQJ>&`gL3g?T zxx#>fwZUmOw>GtL0G}n*6%N$p@3v%5sSY&gz=j-X2w4BY#{*^g!%m^xQD{dedr0Y_ zHS(F-Lp!$j8??BfFzz-ERtJOrzo{Wyygck&`!41`QWDMs?fSQ9$-e7w|C?Enggn0u zTS|7+sfyn?z z0vQ;{9=VuWqFf=X1MW4q`+wZ(aB_m!=0B@boV6aY<=-O^H{om1gQ;hjO-~{~J|4wqwBe<_j2iZBd z;6FPA2fOmCzvbKyQ~xdgmJ`Ss67?@TAms+~o!x*(+SC;QuK#1m&fgs{D3fx+p#F%k zcE9hr{)1BpLg)Wm#JmeWfk67N93k@_D90~H_Rk0CaB}~qF!H7t2b9Zy(^LIh&g?#d zc5r5|{L?_~U|aq?XZClv1PT@QPU~OYZgTxo%<9j#2ft$rzeQ?*7}(`?%-t|(FC@m) z+z#+y{~?t8_qa3K)C`3NDUi)B0>v$jMp;6A0J;gFW2R{3f4FRKV_^Zo>_}6zjTOX3 zVfHADKOT_WZ~x=Q{nPwEid^#^q?G#yjrVr|=>UHG^^kD^tmy#1{P#G=-|>LoHW}q$ zAqmk@K-LZbHUy&og%3Rc3}Qa^8uzAxviJv{%heQP1?d??XB~tV|9`L$zj&p8h8i5SVNhs*ce(GgL;D8y zpY7hirV=21IQW5R1$9S2_JXE;E`<9ZG>Vgt>yL2jU@#X)H;g%I|3Szsg81M+Akh98 z5`6eijLI$W`>1>3|1XTn4I&;uv;4vP{A>B_-@VJW{6}~GM#^yhjcg!@Z!sby`0LRN!Z|wJP5e5H0wQdJ0{cD`c&G)zSco2O2uNGNA zhwLBDnI_7^6a$e~TIP<;H287wZre@goQ`8++>@5qRxi;InmD-3gciUc z0V44a1o!Wt+TR{HxbdL&>%UruA*Zebf(=vJ@GS}}g3)0lp+!}bVzjDdNFX+`T;{OvG&EL@g^}jOp$$L-)`)82) zE>81*Df>P^eEwEo|0|NSd-b5CfA7Nn1JG+1dq7q;^~MR|ZW;a_62rliE(k zW8z-YRhPNcVA3?-VVm*8XSprQ+iADu^BbzPP&Lr~+BaU=B9~tsB6gMG~)WKIHhIF>|4tad(dNgV+fJyIjU_WWh@)i7z7KsCX+Ma!b0+(Fci#L zB8cQ_wn?qO!f0%-$FlUn6Le8GUK!Z;6zgXY2izq_kOXCt z&B=XNNfQ&Dc-kxmO@SonI3h~Dl|-JL9^8G6fHbRnH2Z^$1Xy<%?DNmBIy>4{+j8%X zyMgG`aj;WrCb*Qmr$Az-81M-@A5FRQ8o-7XB#^wRTa4MzV-Fv}E^4zXsRY=8^PGTy|LRvd9b*fJybtKVu$=X0$_v7JqqY`whH>dl3g(y>$@gA&`$ z8Harqcetx;a0PMed9}p~2{tm)F&37Sm@yYPA z@6vlYC4=>~Z;`!$_=nIpPw?eGjuh)L_L=6TPnf}U3 zk()<}-G`T3`M(x-7g_b1-o9Wdb^D~-vd2tccej}D+FW#%@tv=}i>>ZETdTc!wc8Wu zh9kJUnx;T6F5mIjW(AuoO>-YCp85HM5vi*V6{B%{W;nQ^&vZ!&|1#K@;zVA%9{#Vs z-$dd<8dUhYab8>j8`J!T1!lpb8&gG}Yy5nej(}Op32!ZZoX#m~D;%iysbtuay?d@3 z%--}{V`SMqdbyWlmTk#4x2e1{Y5ftol=j8Mg?@XglJv6Bz034y<=_pX@6evP)Y*gUJDi*vjIDk(zBnQYuV3MuSO`$HoaSb z%V);P$=6aq2eXcjCf(rb);RX;fUKF;&iWX2*>Q32CF{walI*(|ggJb#wTGA|AG zyF@jUZjP_tRg1|6It2GA@v^mJ%{LUASrVZI zP8qp?WA#F*lLj(U%rQXc@K2D;PD|4iK2fd9_8jmU6MP-IdMc=KK{QlAvnm;`Q|;c;=B4;SNi?(z>f(Jf^@j$lbwL9>0;6_^h2}9MRw zPI4?CI>Myg-Qm{n_N5{!Fh)(RyE{-ILlw(4T%wZ1_c=p}La*#^$9h-}Z;YVrG{bhgc4vEI=FpLJ4=}x#CzR1xx+Ae& zs?zk7u^}=H1^R@D(6^UWGTE=@znWfgeE%poTyAUdCb3Zta~*}GLP|27#PA($>HM$D z?Ge7s$yZEo^uC@Y8+Z?la*t@ZO4slk#7dVrvB#&=&Sh|&?Z|u?Z8Fek^IX7X| z*s@n2nVm!4GtccRX=mT*Fi(M#y(&sfo}1Eru36t@9qV8=o;C)Txo&=jAPx(c$)wAf z?<4&hHui-H&7_R)Bhah!JRcnY)4Bm!yziL?WhR!!KR$NMVPlhrj!t)O27g$&YE_CS z&HVl0Yok}0Z0K0coXDP3C@&FXd3q?z8X-?N z_)T+ErL*HL6Y!J1AN2ZxZ?gfRf(lVQSGYt5Jby~_OYNzT^?& zPpj?D?nyOpI_tEn@=)sQf3EWmagiOgK+Ki2o1ufga zWGhWpEF2q~`b;Zzt7~Voedm5W?~heA@vY*@z(DP1suw(RvuaiOo&?^e_V2oe&F{8M z%DuM*nnS-&aunl9ycMzm!a2SkHZ#8wq zjE+H03Kyg1Gh1Corj25FK{;xx?O>jT9`^nKoaexWd0egFBbJNKsPUSOnMs(7PX`^Ov3Dm4Q+3-_Gf?NmqRpAmAYmakWZ|a!d2{J0Hi+qf;^J36;=^}!6UM}nV za1D|8fF!+kYbB|A`{(40s?W)hF~tw8c~u|c*so1Egn17J9aY4#7{3kYsOhqN_h87u z5qW25ylVVuQeUf9vbgugbXhO3gEbB9nH1^GG@d`wZ%+Xazz6Gkdyn2e$-b1cs;njM za#2y6P>b#hBby7t?!_^xLQMu`4)(lupNm2G@s%oI(|PrLs`?8BnJVWiJ0<~JD8G#r zUWcro;A+YV;hFNsb{u-yq_eP_!1KId{Din&d^4$#q~qe0!V(sUt@0Lpv@{!p8-3mH zVso29MLv1;*gUH+IYG(y?iz(sb<^laUD(unIqK;imrJO*aIj4TO$6{cJ1K0P&6 z)b2i@I#ZU$V-r0Tslg7cS9nU)GNX*01z( zX;a;JKYJ%`Cj zQwUX>4N(?(>M2vvGy&pVE|EK-$k+h-VxN_%afb@qzVd=>V#LOe$C2l#)J5vZ7?qu- zQ60`ipF7f}azPCLLiL5K=sb=rVz(l|~QbCb>237+>ptflh|JeKI_%mV0Z*xO$u3dmz!7dof4)e^@ z#W8B@w~Yyk;PR>#!uo+-Q57LO894Tds8flf`V*nCnKPoKd=aSIgm<9lwW?CMx=31x zY0pwUg-Q8OpXCV@dKjFdIFNQUhdNBXnN;2wsyk|7#w+y{{E6R=sXtt|?tk z>oP>L&si@Xrq#$~TqWF`XQt0bj4M_!JtX8@eWznSPYU<@id-b;P$;RQgKd^a%azbA zv8)|Ox^+C1)Gc%9k0zZ9EaXXL4x39QO$vLPt?~HQB0Jp&_pg2)t9?vqzQa{9AtF_g z;ioTSQkCNdLYJi<(hAL&0&_5*y>;A?5lJrD$nD8RD0qGSg5>%;?sfq~D7hJHm4VS) z>Q6A9jEy=ZavxWw$!8XWi||}vBI*RT;>}61wd)j$5=n4!n&ekZxbt(;)PAh5`|Q6yN-?%9 z{gy6u&WR3*@bElKDBZxFuvVn~J@0BkUA@UqEZ(y4*Y0=0G&rhlRb})r-Mfc5*j!Hq z5Pav$@<|KPbvpV#8aTn%_3)y|EAhZJoZWHaVQr8$sl;>i-_P2N!pv08iWz$JyD6bh zc*%NkDM^r-S$bvhAeh4?er%jhR4bODxd1%nct@Dz0MM*Xdt}|@4=o$s`vW}PVn&A>s?VHMLnY-gRNh&* zTgGkk-_I|ET|^F-4@Gm=knywXJ;`AfrB6SXF%Webgd38uW6sFqw&ftGpn^<(`uai# z_LyE%Lbp-kO}XNLo$b%nJ|23f`8gK2RfmTbsMz1Uo=lt=>nnU~ADPS=F?PQQH`bOi zsrtZ_oAcRh{!qeW=89LhwJ-vvR!cwL&nUaLtueNP#Ctr#W(uU*y{xZWW6tM=+&#bX zw&JlTs)Fa)qen06@(Y(9Vr}DIWnBdc-xPuwN66_@tX&A#j@aG6Jj~~j8Jh^-5dtca z=jSgqsf273oZF5(W8uP4#xzVQ!-F|xZX-T6g{jI4QG$)E&423;6#_Zwy-@i3mFeEf zfFgwlNlb|BWEEAhs+{vgO&PrBIl417==CT@X>JJK!hC<1FteJxAmufgTV_0S9aX?c z*vVF>^Bzd3n^xVL%*>tnxi;eM@x3j!%xU(&l$`no5rTV)@os}sMI56X8^-yB_%}A^ zHZD0F=In93u=o|1Hij|%s;5wfL6&g7@hwJ_E~W@a7Qc;X^0Zp*wxaf^;YX6sE3CtJ zlT4NnZ>bJwF@cF&*}5 zTIE1Ws;#pVo+j1m*9t8{D>oO^2(&AdeHg`v%aPQ4O&3K=kUnc@ zU2{Akw$m(R8E4)!eS14=N{-44XW5=Q{o+UfaM3xYZ*HxCoTz7{DW<=3h=_vSHU9nm z6&EvoL{^BVNJfa_iwknE798CtnU3jTIa2jg-yCY;9rSdlRv%S#nYhG~-U9#!9Xz{_ zPgQhVE*x*i5a28AWDx4WF3;zLSWDZdKZ!o0L)eKN*DR5O9ZLe~u<2@^0fi_*Isp%l z$&x|&OBo}wh%-=5e*;2F^lw#-IIG`#nyi zzr6mMfGNtb*m|fAacn#!i!@!x@8^=vf~NG=@;yJG9=@vg)&})R0k9|*q>g_skz?cZ z*wb6&th(h=n}QFiO((Uo(wGc_tr=!wyFTG?-&v$t^juwLCIBij#OL_heSKlHbiRQy z-p7wW{jjGl5koAzQwJ<&SlpMgV<8~jHLr)3AXx5U7vr0{Ny!ps2Vi$9I((h&YNuK` zf;zD*smJ0vPi@40Rn?dV)5#!<;@6>u2eec{m4`e{$M5ElXyehJ-~zYO(%tXCW60>@!IgX|~4rxo%nlIz0SZFGZ7wyj0RO z;xOc4YwMcLVq{4}ti*Fo4^mGL zyA(=UP=e2OG({^=XRWo^cU+-Qo;BSdFeR4}g1L7O1K|^T?dq$ej?%ZH;ZKxkIxP(d zJ6#o6YWIgbg&*V^Y>S}LAX}6G$s%!wPR9Nf=@g*CpL{X}ref4DNBHZq?yn8F6w)l4 zA5dKYim7EoV*ujr8gc6Spu%7Q_&?b7u(MftR*E1lZyOXXh84D6r142)4?R^;? ziv=U3ub``X7}C$>ZAX{v^+Oj$21XEkouk+cQrj$Ay8Zs?@{iyX>YNB;23Y4^B*)t$ z`~8PN{fD^8-%F%VH+XU(&+b1g%^%qeC*l~VWG zU(Z8%I1mvcGvvL|Omp0K8DL_*|_!W5c;@-iH zDSe((#{`7B=9hCCq6DBF4+iQ3beNn^n)vkJsD?eQtlQIB(-M|=_@+3)S@JBQF1ezRD`u*5cY7eZ_kntZaA zLj|W-H%&L_X(dbat$WBC4?iW0zmpNyetf4Qo!&moPdjrlwB1j z7O%_T)tnrszC3)tHo<5UM;Da%Y(T4oj>Lt)H}2g7&Nn4DDCJ2VX+ym}TGYT#9p97D z*XuQ#3rcn%$+_T+L%1)t^7o!LM4M6vY!;-rv7D1gnoweOS$f3@ly7hsDB2aUCA|tvDHb5v zcQYmMl(>EJg00I~TM3j_Lx}pJqbG|Yj0Au=j>IB!fr}sL9uleyo^7rMhC4+xU~4(v z?MKIJ(Ki&ExUurqP~Q=g3QR^(WNA*>fOqLv{|!M&G3ySRB#>=S=f63*J`;hNBqq(` zNgSD-xm>1=+J#XSNhI>4Pn}dJ3;`#aOM1_GN9mKZKLyibXLMVe8?w9a+#z|iB4ZVz zR{&pwx_gedP>H<)x6-=6MiJp4IqD$(P zQ-v+)7!|$(z~pqzRIeJZo;Usi@eS?(-snt~f@v|DktZRto~5`aZirk1nSlM96a2*x z)xq}(Bzbk`U6!vWAUnSnWsQ~_x!s;X;3MW>BiA~wk_1_k>;@DI{aJv%>7~7Wei&zwi_-4F>|)69DA;6u8v5R`Kxo|1yu=u;Llfj9cwF`us8OVaV#%-g?XG#4=aoF{uZryOou~r zUH2hsyWI8J)G0uZ4Ay6f-9DCW(wXQ9>=_7-7W%#8dTM`8uqKK6tk^!yS2ixO)nS?g zP!oXjZ^U5Nq+RLQ$;fVyM>34wgCDl;F3fN~hgat^KU|+Z+L)w}0MS;)Dx}MUq4iox zY}yp;M%GY*z79W3vlK{jsF`+1P~~(MfUR7;LS8cffVvDZe4+wkc8A||v$nbrT(;i^gy@Lvdq`?!w zNLYb@&c!+9qAJm=?9l7VeamUg+M!2f=8XN8JAq)+At3F@iF4%vE-zmJjBXgqxFZ`C z$Dqye@&}#Ia;Kg>1fS3YI;OFwO?bBImO6t`5jaqvi>%c2IA~ACFsYS3OZ9Fh)DbCC z0r8;+KkSQ1mb#Ab)hpttQ;<8x7mFdu35#jRN9)iMDC`9vzK1NIUnnRCfPAHja!8HG zfLCLvuz$qSg^YM(1C`9S1nx$Gny?H%X{z)?AQ(<1B9mv(4mD*sxtw5?>U^WrXLV*K zN7u|)|G7UfTlT&7Wo~yaxVs+I0w7~lbl?hgq;J1GmLo31XLv_YHE*E#PZ15U-S?JlRs2A=>$t zL?+&e)_BV7mKUS_9$($46tEA~lCzs-8O&7X?-Uj^acmVyhNWyx4Kuk-d}<0+U&tWM zNPUy9KhI@0*gUL1TsQ>; zoe4ZCzuHuMG;nk?aO}j54&RERs?EfZ%P7IrwGP|Ptk{iLV8M1eS=!FSTo>oPmI0C6 z1+4v$DoNlywaw)6t`Yi2b8Cf^Y2MLfqrSj=v@I&>0}9)N;C@wTyR+fxq-O%rVfi<3 z-cnN$nyH=Gtg7c46X&`?7!sw7m&ej#(KGnGfR>d+t;Q51R2VQw_%=JM0ATogE^_p< zcjsvj0tRFWC{AFXQ{sP(t5!nQ!cXzx=Wd>I-^p@&zc;8%UT`6EGYBFd_X!uOz@)FA zATZee@{lB`k97AuW9+JJ!-a!L_tK-DCnT#g_qXWO*T54*wUM25)_cePQKyhTnHv~y zIY4hq?~)krL_fm@h!j9@dPJJ03}|uYaHC5vN4;he*0x}&xaShVVQCuIVYY{i*_R+7 zBYhTJ7%ioQV?D8Vq4 zu6AAykU5>Lws(Luf?OHf>KM~Mhf1svLM+0l!CrLx%I8}Q|C&|GVz4Huk*dZstdJD!`&Klr>?#LyW zPD_vhb$CeD2}3ZS29poZc-Gm*OwzG&I@mbs1Slan1k6JLST8rz-p9Ebt;ICF=}SReY)gJqO>i7_<sZE_7>aNWc8<4 zO&}xbM62s0fFbo60J@p&sZ$QWPa<#ReVI6Xpk)C*zDf>~7z2c@H#AZnjED9+c`XYE z9msHGK6NAPv#_?(!m^gY_&x#YQ`UQ6S7@W!j$EpauNaRp%a%cwba@+1lLvvFCMVT&$64mzyOtTssmL#-*;FbHwq}RY#bJpq4e53ORFO6cz!$43#j%Y%TY2l5mTs225>a)4Q?n<2NW><`Y84B7qh_c7K1A- zr`e5uqzVp)&`&*0awud}Nrc4@aWmHD+fb0qNwB;a^>cEY9q9Mp+4ArKM9U#tjW{$} zNDKCX{Ye%RSfK(<3MybKq{XLc%nx7^Q@{xp18|}n5KWGt_@ER)FWYt2D*2wu6TABm zx*{YjwmB!Y0cv|v)MYbMeP${^X!`Z6%M{?mrnc|q0ykFxu|TC=X5q;_3GK}nwHFn{ zW%>iQ*Pl_|c~aCa)_bA{odK0X>F8Y>t!ecGa+sks%zWz$=&4*#KLHhZtbPDQVejWM zM-!+Fs-GX{sx?fMG*d*o-ZE@3aP$lC_v2l6&hfH5VHs`W3My9)(QkM-EJMQz>yqF# zZ#b^!E_8dfakP45u?Fxd?)Bt)_va@2DvCB%f%im%zKcLA(+C0_ix@_g!rJYhErgAr zEaK(5GsqU&J~oMXfONzV#7HS``$1_<@{g&H&mu4D7vkE)cYG@snpykLuLJ%^2qEM? zbrsTrDFBd{E`K&`hbRGv8k%}w9^tH-dcnQxwXyHK4E@NBB`7jYTmA9=%}e)b+ocn2 z3?g|D#Qxm96%^4NIez9kC?|@%OC{QKn(gW!x?bZZvp6Usxkg#l_O_X(`Fc92M#CmN zf0XLCqF;9WwS;-p;%yAf&zJqG7%q51s`famwHBaKe?li^*k@#(| zkM%1pcSsR;1B$9&siCkDs;KegxfCyiNU6$ntX_`uN#9tVbp%t~vQ@z?LVQrAqqkVZ zcR-mH&3|j`1$^X~@sWuEfSL8XY~JBg6&yV^Ry`WrZxN%?r&JDj zn3H|$yo`HB_lVbm;I&L-$+b#%_a=t;uCD<*_FyNVI=G68`-1kM0Hg#e*dY{7mU))| zLSr`q*XH||+#!DWD7Ek4ce!Rs3o1~N^*LLQV=S{`y0rJat~9Huvw)kbzz|G$GN-6- z^Y&us+D8amjg|5L&IGdtY<_>=tqTBAbW^1fujLBh&J){S`{qz1Jr`$5VRnD)9Ix$2 zk9_Y$XZG0l>oCD(h*<#@U1!5-FQuP*?J#Wk5pdBnHPf^sYkUoX3`6++fLWgcS*pj) z=}tY95>V7t5tM98vmogLRj}B-J<|nJ2iE}4f}S!*M-VG#ZRol;Uhoj$1aaJI4_`0( zo0%EOJCK;V&Oh@tS622lACVa(=36wQIpdeivhzJ-_|42%T7Wx;nEjAckmF7GJSg#N z^KwO0`o3A>n2IxW(0A~8QS;N5sZc+zQOwwL*n2Gsb(TReww^paapGn;F$H0#bv5%i zT&LFeZ0_kC2OvuOM`+^_d-HDHON+2Ig8DI=x28LvsD2t-4t!WrR6e;pY;&Ibox(dU zxK1+vyMT8Zw=cLqAamc0&@Qr68N)s|EPIi}57AK++=^XzbO=)@C%qcWqLXa(6NI}? z$k;i~?{c_FP~9+{YOcDZv~zz82&aOCTO=KB`ay=`+DKI;DEpIY`XFAbgrZoaTjX@% zX`}tFBfSRl!E4l}^r=H2+yBs7qyYdV#3bzzxl)J7F1GQGP^E8+?d)K4 zV@ag?C`yr+jxf*P;c zs?4s&a1nIZmDSvPwa=48*wW^>VdIA>j0RzR#{DTC1Ko#9=xfMKx(bcinlExtA9>Qs z*ZYppe#*+`)`NjqR=toXYbBM%Gzyf^{Pg{ML@{aQ2 z`3uI_!guX`uWQ0Vh2VQaO)&AV zLJ0e;pvOd)k>s>WTfDZPoYt=n;UuNy`ynPkWYk=nFct`m>bfpD;>!U0_irP(=1cY= z-qGrJuM*E+eqmVVPznx$P)o?Qp~J&6*9p(z-#$aXPH`+<@s(Rvkbr3&Y*i(!PyuFH zyV7Ilu>O!IU2*C}%b4#Z3+oWWXoeAhK(6r_TZoi}=uXM7kw(8N6RLT>urO32(@|)L z2vQVYFMS)WV0MRv!&iS=(t1P1ge{gt%;}ScTt(F3IJJ<;d3RGoeIx#^>xWkHqk#I2 zF=#(tek7kMn=NjkGDIqoIXq8Hhz}f*zCmXE>}#ACc^>^qDqRYI-}{WwqNKEP4N8(- zLR!_>-hZQAzCm)YWix?ITmQ!DI{cm#tZ1RJYCL!XRDxXd45FwRVEq0bsv{BfsZwWH zqKUnR<-Q0A5q4T4u_rgT&dpXWMub1FA17bFn0~H+YQdR-Db>>~^bBe}xqlt~-Sliv z?UnNB?)BU1^}>m^G<2k)8p}$nql7xtly9!fh7_XbyUZgi9K%D$fk$+|nH=B3q{ zB$BNlMXg_+3vwE{Cntoz@kvXqViaynvf!k8{wrx{#b_@pM{0m+i+4eE?@jd#l}|B> znQ`^8W6eJY!A5l{kv?W<_;D{rhAxUK2w9cDav^g%zH+o#gu?`k(w$yId``d${nk%IT5gr)6l+=rg4 zoXQd_jlBZHU1}ffQ|#M`9>{!a??Bkux4>S+7uPj^rBEbLuY(DD>c-qHN(U!l0{yT- z8QNR1C?_lnl@`q2=Fq*n>d&z(X3=NV+)w??2!LHKC~)1eVc2&iv`A#Dr-zr z#)KQi6?}oveoM+oBEK*PX!Z=6O1mWU8r5ieJ+EFM*;J>D6aNGZ@Dvg zL%!IWf>o?n^zy>*6gKI*-kdBqmg)Yk)G`%3?32Eb)!2VHhbx4fqO|J-|FBYeL7XX zO;UxI1QF4Mj6HJ|NqE)rp-A(PVWNpi{^-Zn7TNd7#dH;nR<^i?LAmN?H;o1Z>C^agtul{_>dnk8m^ff8B1pvgq77u}_M zmFBaXh|cyvBrXx0i}%EZkLCzZJyOq=&nsvT%I8g17^f<2C*%*27D@5%ymxI!4B_x< zb9hV|dCa$qnldSdEIVuaVxa339o6ag?~Y{SI?BW5-i~rFWPXo!XP7i|8n(gFhss;L zDsbuGc(3IvbA(Rm46bi*T3NY@EyP@vhMCorg2M#V8-FgJxjX7tRm5g9cw!~g(Ocx^ zE7dB(i|fI;@;)q~`jL%+fP*+Lq`-K3pQ?QwNQe%h$RZas5y=m`(Z-m5?(vKa4pCbO z;I+y(x8`dww+zhgSl>^4V|iI51A94tQ9}U+vJ*11r}A0n5zJ|O{PX5HBG#imS4f_p z_sF;}2C{^ugwbWfs;H?5tBSPp(lWLCCwTj%GC5KO3)(8@+LJXMi>T$&O(GU-oPSi_Yjew9M@LRX5QDVky=#Q6?@yJu zFp2k28I+Xd`dzJ2uQF1ZESN3xwNWLRK!r2Tjq$0>kSq=a6IaxAB%x|F0=*%z<%Iy=}5#yg` zH%7rn=Vzxn^2k-jKTS>!A_GCK56?8qmJ7LGP0le;iD0+0y;|FUn6T+No&A&J4vg6V zrw(4j&@4m6jZa#csfhtLkzN|CnxB4yuh@8$EXeibr*?|mj@4G8Spm<`#d^)YIO(WLuOXe@e$+-lQS1qzC6+?t~RtUiS+|P zsf~E5>Q;>jL{TtlgBY)|cyf{WiB?{~(?|A8?K(^Ah3D4u0=8PtOr{9A8PFTb^vKgf zcwT3w#+!bH6o=tkc_W7JKU}8QTbE*LQ};o}vvFT@%=By4D~BCZ$o!xbY8JcWQdSLd zf3MPBMVI%GTuKL3=&VqA{uGGo-TAMGE`QRRIj;))G5Gpsf2Ucs&#L0mL@nFE`(Yg2L<(h0z<7FRnLKH^duZLW^2fTRz^63diowAr5W*?eAxQid0+}_wTY`4S0I^G1j)! zTXFOj$=fB*lh3(RM92%;^Te!+Oog7rej%GPy&6i(d5<(pxzrOYz_ty@nIfnTIh1~; z>l%13fh-*^8z+a5+8p38;TEL7qA|plOUFhMhdt?3c9}{Zd%f1813c}_6x7V#!&zC9BEnqx2kO>&iTN_gaWW)@s1m>(}cPTSS`q_d^ zoa@Qmq3TPJFON@TAzd?7DGLZ(f9~N}^E}#lZek#bSjPewHs^lmN!Uu&WG9TyYhVZ@ zRO#rBn#C)Mh!L!lGF80qyrSg|0dEk;7=7jE-Krg7Fjw)j$eJ{JTNM1iifj}HkYBhKS23dTnZx9Gd$$6=_5IddyPz00iS}b>06Wd+f{l7 zVuIx&f6k{4zxM%*06;8YCqbdE=uH9XuA%2LA%pKTPS&94sGh2(OyIsj@7&-dR%iN8 z%wwx003OOHaiSdq}{AS-N|4r!J%ddk$+v4C8Z367ZxIU1B1 znVs275osJ38_Pg)Ha+Yi5|7T}s%|}N=v9d|h~sQC4`m`GrZJX}i$XYEHYm>DO(w;> zPPbn`65@xeZvtW)4cIG&c^|kARuhyDfrW)!5P8UEPfthE=&6i|f-AgW_XD|W^6Ihq zH|+&>n``r`nIDeC#mx&9`bKz4;1G67y)RwIhs_)i9`I|_T0eqWQuO1d#Y}9FX2;>!17blES$QS36X07Y%GH_^z7-OIwE)YRQM}e5GDP7o;syA6 zPr71y=Q|tDc>Mltv+H%yoBprqk6rpmW(6tsqgV2LxZ4t+8g)KRhpVZ9EhGuKqn?5A z)R>@=+Ltso2Aq;{O#C2FXR8Kn;?{qST6fa*5ufxk#^7Zd za>lX@Fyl0VVko3xLP{*m!2ZI*$HLMIl<)Ajho52efL${F42ZXnggRK>ofjsxHAECN z=)lo#Emgt#U>X|ctHv@$Amx!JHmhXoMP!YDYN(Wrn^vD^ADI+dG@Wys^AWRd6Wc&D zSx5KAm9x!=KJMBE0FfZ=i2$5!Z>jyYs{{n0PGJBdyZ2 zwkKAwJ-I+T+s-KY;AFpMsumYhJk^!{|?kqZG6GEe_{$ZM7CvoY9Ld;G9^|d1~*?CkEKRZy)u*!opG| z_N}hY)0+TyU_7T0qSx;O1@I^jBkIskraIDZ3UfZG`WfTcq;x6^?SJuN zn5W0Y^~VaP6iRZ&zgCiQ?cZOt?$WUgqoJ(HfsS8u9VXq1Gb` zJ@P*AyQXC9HxlR%RVW{^JfGf}P>-vL*LB2nX`p`cb?&W9_+PI~FvX@~N!zDIQQs%cyrdXVdx&+P6_J(S0DxHZTUppKf;ZP`lygE#zB1A))et8hT{-a`DczEN3y9sa|k*egX z*T%ux?x4`5g;2*Vzq|ca=oPqc50X+K_zjUdOb9*yAx_tk-NCM$%q{C!toK{qz;#M_n zkoh$Zt8V#?0U0Y`D3aS1-+Xe43Hv&2`kk{~@6j&#bST(fr{~_+p!=ZfN z|KXC#QZbfPWEmlQh{)PRWF5>{3Pni?WsQ(%WRNBMnh1@tWX&3-p^%gi5>b{Uh3w*Y zUi!Yj$MbvsdH#HkJhSIK8k4u}++$y`@ZrI#wnqE(e9QzbIPD0Ai(iDm(AwXDhI+wQky$4(iK1~C;q zXl^@g)5u+gr;idD-jn@op}%dk&n{dt4WIK@;zMlJ#hjzcs?!< z7*7+W2YZ|zqdmG@;NU829!*zJAtv~gz0=YuEz-qs2Ny`2$a zarBp`^)waIBKT13`lI&Bc zqe~3XYSq1USAqY5(w+GHtldY9%1Bm3LGccq4Ut;l8_Ez-SePVu+N#vVCj zBBz84E0a)}4YecUDimJ@o{^+0IRyOhT?Hxh*ksO&WiV*)Pkt5aQfugvzE5>c7p1!= zTEAu3G9KT~L5xQM51w?H><$!x%>ynpU)FpWO%l^vCleY7`GcH+t!x_ESdfZ}>zQIfn!3 zu5>Rk53~XH3>RkuCG-*<;S3F#hb1?g%^K>_&6ZB|iY*=EJ4? zbsjffm>)R4KD!;;RsYU!*)e1?0-0C67g7uM24)FIe`k~_R{=1I*UR#XA5WwQlfOD{ z&VKtvjZEkWGHNZb%W39PITW+Pl-ezJO<|l{h3wUt6WyoEKN#i{X&co8U&(@zl<2K# zc~-Sv&NuG5jr6r~ib;g9oTiMiKBXdheHUyFt5m{h?5t=Fj%gDcgi0PW4BJ0|I zM8BvMlV71%yY(`ZpJx}{T>@VUqSs;nK(%|}=~&^S0>q(@b!1%IwqsYZ|3crZQ&GG> zZ!IFYr%xkm5PXT%;`cFBt7RZNtQTt`yJ5gudJR5WxnlxY66fDvUM#Gy{@PR^7U_-U z9xu`#tFdz_@`b{`&}VLJ0qWoCv%TJXGFyibzyYSkH{C7sG-CrW2>;yQCj^TiX7dWP z&9zmKcL`y15^t-ss!N4@4MFGMIU1k0#?TFnxgxlP7d|Q9Uk1#jHs9lWIOrsmGgm;G zc?NJ4=ZSZjPrpVgAIJFgSUOY>LC^kKvSn_12y}VVJ%?_6%sjbMdG@s+(%4O3YrO`t z&-w4ba)Et}eikvDnnHGo`GJ@)E6z7e?QPNxQz zdKFkzccOrZk2^G_=a<&XM{9l*x$hjkaJA!s>gNokBxUTnR=?aFI?Bi{+>4Zct)Xtf zMY#QXtpHD(2_rllZ#;Y^6)=Ynfr13Bt&$U81QvcrqQ>9wC{`DIextVri>#GJ2rTMr z*F|hF6^H7w!HZlrcNYu(=JZMWuaM|46=!Pb>RRY+?&AVI;!`Bf-2?W-m<-cXAg8cC z2m+@Yc5y=fGzt6Zl2^QM%NQFFyXNEj(bD0dh{{Z-S>c&(A1_3hqp*v6`0XQ(m=;KO zC}%HWFm1*C#u6|Mat?1j76k1uG={6BNsxLvZ)z$lAqB$EQ~UZeHtGvXoH6VTVrXx; zR8vQ}9&->I&G_%c^m_^SW$W--&kHOvsu4s;+-2`Z zgs1T@fj*E974JX~f3lR3wJ=?ba2tscEW;vIa2a6C=QG(3-n zRiy=nSYeL)y8E`^=0RCp3Xm|`vhwlv%nXQ+o;Pviq%H6}KHV{mwsdSohs`|#oRB!-nO2ob;{}XaP8M_`S7VsNe z`X@4Ru2WCiQ}63kxjF!A106F(rM0{>x>zZX5nAYU5@l9}3Zm)oIU3}4q)RPuX0~;z zWR=3!^Y&}Yslsn_XBdw?=XDayIj$YwduGJ0VQGZ~#UUa>>u6 zFMnZM8K^ECP4|WS)7Zmj%4x!-fR#nTDhlU@J)82w7AvD;NR9&Yx&2r=EgaNucDZfh zF)nzya1ziOn3N|v#oOiG2kU&K68^xm=RW=*LFDosCdp!b6s3~s7|Fn~?cb3+*|(zM zEW!Klff*4nzoj>H^M_<*%OnCxXG(Rif^D;#g2ZzN|NViqmxg^YL)?kYMJm#Zjo$O0 z$kMf1NHBee7}$6;Y%%96#a~rl&(_a1sbMc@p!+a{j~7)Y7`n3R01LWDau0fcHf6S# z${dGo>(K|UQb=6lzpYUpEH>|Mh_$;Ty@<-dK$M-Ot#~I^+RCOX_pD*jYzT68RF|He z=Rm@<#tH2T9DUf`+lzLWOb*>OEJo#ytYyG^j|T^84s309tXa#gT7M$54)F9 zM!ndFwkzNFWz|7diDkv{XW-vy?T!{nE}2rPkr;Lf-~fz|K~o%h#AW z5r;y+_~=VV-|tE%pPU=L6ES&D`d@)F!e8@MSvTy%EbZ?WLyBWG=ePzwUyEOgzO*7# z)C0Sxor~*1soVhsu6CA4@5$Wte!|nzc<0tn5>Q4?E{x`VA~;7P6I-70*^={0E{Rlj zSVT&JPT_>#Btsh55kvjnXK6kCV5nSZwtb^v_%8Pp)JTWRCXLjVX*L0WJ(2&U)enj8 zfD8Go24PHYsU_WJXO&)^0)u$#n@=G6MkLHnfRCSAd-df&l!fF;j%K3en@29^N+J^N zoPQm_bq9DmT+J)AIFXO6MZ@%^`x8?xYfWhKq)68Ve;>Dk%IRm_+}q8VE>J%0G8%>2 z#+gNJ>EU1KS=s9@pmm4%rgFpz|v7)SlKNV zB$9Qc=nx_N{KKZY&>SXWe;o6*Q*ZiO{NUXy!Frw;5*L2w-N|6w<$8%PhebP?yCK1< ze-ap`TOdw~`Xuv;=K{#1o^=;le-bu&-~_IMr;9@YLnu|J7J@UJ03-GS$IvH~fHVv0 ztoMJbLA>@84y+gokhhR_wbvKP?f@M$^ou+o--@E0Y61Iog7^=sN0#0MfDW&LNPJl7 z865gg`0z^cqN>KXDs%eED-(}Uh(xjKLhsAw2yO>Zr4*xz;-K45fu&sS@>r@fl)IJ7 zbK{$-VSs;>ynm$qDQG)$+G6F6UxP&lQ5@E8Kx4ql^mS?(va5aq0sj|QSKMqz(GiBGIM;^HsF#Zf&1)I`%}ReY)9#dWl&Wrcue3?vRE1!#hP{Jn|JhATy@Ys=oxpm^YH^DZ0wTW zWkY#zy^+!E*-g)@U6lKu!H=SFYt5Tm$t@cy!D|ZF233^5+}?&#Cwf2x%4K#pss2x= z9wg>nfvlloOcL3;wdz}pUI8rNw+VA^l{=2#n4Z%76UHg-gsjt@!H5-8GGXwYfV8F6 zlgY^OoIef?Ls{UqoyRl%A@ogYZ%%hx$87rMlA2RxA_Vx*w{lQ{(M-2!(cTefUUdt? zaro&*F~8~J{i^H#iV_Z!pgk^(Uw+0#1?d}-zV^c^b%Kg87u3i$3m{-oa%Ip{NhVv+ z9~{(G$qCil@K&*m)LyzzHb9k%Gb{^3iiy++yK#6W=l!I#nBYldY#<#TW&m#S=*Sgr z1*gkU6x1^x(}*5PZes6$cw*ZPcC6hW2$6CNf&xNk8!w(--p1A`NZF1lZBI^s;v4*A4FeH*R@GK(-GgGNHb#2Rr#{0~{Ldd(F+Mq%=%0xVOn`QfN zUb9<+HJ(=&ucjxO@m~6CL$A4oE>yc`$@J73*B3v(ND@=o86>^R(ickcT7K9)Gq;6>>BaSyQSR8POh<)NUuoBmir8Q(SF*}Z zar&bp@HB#Cv>#rWwL5r8A`4}NU)yCKbb&*=@IF_^4-zAL+HHRbwHf%u$mb1L)5?2G z=1y`yO>>eujsK%^6-_JTYA39>bw9sj1l9MD0QiR-dcyR_4UZ3qdC%f4^Zl%7Iut2DFnS}>|WfG|^DDuEKg9HzOi)!D-H zmh22ED)~QGjI=3i8(}-;@&5yw)a)o`xiDW7eP$hxVMDsK32ZgOW7cydFx{l zs-3Rt`&1S7YpH3D`yrL=#vpgHF;cJ=`1AX@Ft1Vk=}cY4EsfE=?|}gxLSkwzqp&Sa zJ(^KssoAU*a+ls*ABl6AuA4J-Q0C5t>qC3t#zx26V``+@Emgl{$HgRb`Q5{~`H`0P z8)4{h+ZPYx|BiweS28LZJrMX}dK}teoq}{dBPZ<>tuLYMxH)Vo!N^Z6)ZmnpMPpgm z;fo=v+~mU{>3?~yYT3=Pr9NcMO=hE#?KO{-ln;JteT$?+_4JtW1~zHEXS77&=0@yM z%T^Ig?r)?Mc8%atv!sk*#yGYq7$P~g>Ye8~{@%%^-Kp*MOp?k>Uv+Wv<&|;L=b*ls z)3TWdbLzKr4GJDA`(HasPKUPZIa$~-@;ni@A-dd!mZ2dn@xAmj2qLUB%hBZ&v2l-A z{1qQz2zv7zRl52oFrSpnqO9GzQbk{bzl$rW3HEm(XVI0c{vXwoy0e8PY#8IP&CkG!kg zqqD99R$40>9h%`_8xG|%fOS6>GgL}Z*0`Ggb%QaN7Ep%|8nF~T;1ja79eMvqp zbc5bftiyUNUr*!YI}T<|BGZcYzk_Z_po}S^gJ!De$S8bJWySq!J-Yq9z?YXgtYC?F z|DDScIg%~_E(LWAPwJCN&;ub13lJQcUsA~#CjsMBA{A2MLS*Ujdj#!n=lYz`T6*~3 zX97ONv8th_)h~KY`)*Xo$3$apaF(sIt`xeg-5h0+9a179e*V77RNU zD(4x@0@e)owwt`)uKzsXduWYL4l`~j>Yd;AfkMYKd*Pm3+DZ+Be^2HL^boNK*u_#v z*r!pqa$L8k`9!uZrAW!HDrgDhKknvRL~D{b=}w7j*3lc8Sgl4@9U^93nDniBG(g4?tYJtiQV&j;hf zXSJ0HL2%IxJB~rK){VsNuxAomFY#+)s-f3^An!8ZUEzl`m-5d2T~H%&Zr`O;c>*Ut z{+gHx7qN?hV}+huey~);26S6+elw1LH?BDY5&skAZ)Tnh>4z164GO*Nbidh_1q7pj zRl$U*8^t05EA0OX!mrvmsSxjj{0}-5VdQxK!7W$|>qGX`!*?RR_X15$XhFUVw8~uO zWC#mOo(Jg>$fGA?ga-h3LHlU6x;LSE)*X0j6jgc_k}iIt_bZ4kdO>e~9$Ky{-)X=B z%Krz(auP}Rh-uSqd^8z6NegBF3G8k;giu)keDxE=;9k5$O=&P7D)%V#U1Ue)7<#$8 zY8|*(NpfTrwg(jN@I|^{wHJHchVq>IL|9jD!H!wq)R(t4Y7%7jz$kiW9lSY_*^{Nk z{0Vw)Yx`T@Gp=u;7fzK7+08vh{{xg+gwj7zixuD>ek)!J6P*}Tz z-FF(|u^dtHhbnEbmrHyCVMqa}%v_t!6n$o+j$%z9${-LYf}db_%3RFoPXyZ_f~Ka< zq^5U*d-NI#XGPnf5N}sYtjO6YUe&bCq#Z+w6nHqmuk9`33M@nM2O&-u*+F0#f5Ri~SW_vMptSY2>2?(Adh#$kZ}IhE9M>s4 zTvhso;*t{t0lQ%eEfWvms*- zETKM-_V8*KoKVoC?65TvM)yG&|DMszfwFg&^4jLph{s(hRnSx1Cz$#Iby8BqS6@Ap+OFwRQsFK|?wgIuYS0Mo`lY?IOTn?8G1f$Knh)ha&_vVi3=c}JEen;aBuA7;# zt2@L)twzr?Ov9EH3WeYrE9r|=#u$wTSTn*9oXCz=#Rx4tI!7pi%G}Q zJI?Id1(eY2iRa8Otl)DQ_pYvupI2L=yloWxa!|u~eU=^ljd-06A-weTL#xcmb6HkK zJbg~(t$X2Jp%7>#{RF-SR4WGNJz5d}@f}&s=N2?mHNe9!{c6U*sKny^w2md7#U_!D0^x39CXT%!O^;ycjY|pq2j~B>!9< zIcbq_D)4{5;kgx3$sp^x+i(k+MGWXfY%~j9HF=mh{XD`uV*dy4c(?l;{P8E#QWZXW z03Z90L`N0j)1h_AW4P#B>5s~y#}7CHJs1$FgCL&o#m0jp!qF7%k7t?~%Qt+t1#3FG zvM^QH&}ZrLr#cwkMTI}7eU;m&?u{Sqmr|WCA^V?Jc+09gPgp&qBoJEn6oq_qY-vp} z6%};*bN%TnqJ1hxOv9a%YTCCjvFy$I`)kXS=|qNZO2d(v=i(^bMb_YXz~#R~Hzx1x z!?sAlmnA3kMg_M+8EDf;X+0vuPLe>z=AsLhZ?5>J2ZM z2OP(fL|2;3BO9tCToV-|)9~@fDwcMTAidia8~v7)Y*@Ql(t1pudIyb5C*)@xtNowu zask@weuHD5FCvH@MR7L!e@Qk3489DhQ9++&hoU}e-UZ10OyE18}Tr`Zmw#*SoK4Tx$fzBntLQdReFH$`7zYc@FS$OKh<#K?zi^lDyF<+ z=_;N$9HAKcXVewA%rNhMCf2Ww+6Ad$V@hHBVu3vRQfh-@rr$8fA zEOVh5(KvR<6)}?!v83f_k8LW(;*LQTa`FudcrYwSe3I9okAq0(ZyhVpgt}DwL$XKd z16AJ|b3Aq0@z5An$>&}lH;7ey}ti8U*-P=iHN+@%jXIV z?akTsE3^nw>%nj3g9wz`(S);3z>zArkG+ZGx!i`Fag$0=ncb`Jg{tBw+%~;F8>beJ zR#=SX5ocW6j~d4(N(ELxGe53iI`abdQrJ0+g`y)2h@0#1_>@%blLcTXWU>6p3Jy zLt!NBBjt?4k*tP9X#hYbRp0mCHuw47nR7~gm?#uC42)j+@XL_I)?q+C+=BiB9#R6O zS@i;jJN%si@fV;CD!lc2(P) z@>pEI2TuZNYuXe3F)8OJNtV&|o{piRw@pdI(LJP{tT%5{4D{_Q7NL$eeC&H?_Rm)x|@?qwy3)+{p^oP zOXxOQC3cehY2fUK`&_E6R`zWb-l*3;g4l$k0wv<_bxh`xdIz>cEKX>=w}Tm zY+0L{7Hi>Q&Sv)ZQ%CJA^}gP&63R`eSp|f&|UUlzaWk8%gAv|5G@|N(EF~bgn)X9RexcU*zlIT$(+JaUZQkvXi2cZQ?}zL zR{Qvg2X^*izJ@<}}eZxt-=*|__7MQ)?5Eq@3F zhH2OUH*Cj(gcCK_S*@qM9V%;$oY>CPd6&#ZV-L@iuBg&`TWCid7^QJBMoG-%9nzF? zEkv}JhG5)n>lLz%*JRm!#uuUZTtk|p-H3|$h*n=9lw+?F*O-i?mFG9-5U50n#L!x{ zl6%4!h%^OUH9QAG+^}m2E0Nh=n6P@(6_c9` z&DbBf;sCI4Op`@b-V1z(tIyv(6*n-5xEGHL^P^_ zhAs&~I>>fDD3EQNpVNCL2l2I)G* zyPWZteY_i3^Z%~Q#}+dL=lh8=@|*0}gCL6==We3{+=LxC$BE6r4)o#u4?SA-IP&9$ zAV%z+BT#C(4Cvppt3uIq(^Ap);QiMOB)D_8&7n?aBr%}K+I(1So1Be z!2%9p@6`_AfLKq`abgne<>fw%HnHaELjlbir~_XZ;j#Ey5&m0~E_rVPF!9eo90CMk zv?V<0$)kCla9n_ZFlrAOB4-dN8Wsb0&Dg8GA0`j@ z##5EoHbEt}Kz-&m{lv$6fC7WldMyE~_RTEe3Vea(|NLEPuLq|j`bI67h*NR6);a>x z8QET!7lT$~CR=QCJ`R5k;z~aqD~0fE4ZV%cbP_DS3w{`0v)b*AP$PX`4LNI@y34I+ z&=--B?QUZ6z63lTOX}#vI3Ve3Dhh0zvodJn{@G%KEq|JrtC)-FWJCpjBy5d ziERMx>I5%EuYjxvBNwlBS?;~#pZuT~>Cw%1`9D;+6baAgm<2Vv5#k9`#_OL!O<4e; zR0)1oEfB15shdhHL&*hZ1s3-c;y3Foh{nFTFqJ%8&Fxb4Vz1tvp(Qg#G=@`R95{jsu;BCpM0OQwN!!z+kZ>J`;;jA#z!_bM=fx6sLV5+fO3R`8 zjEjpnzX~#|&l@Gom{rhFRBs4Q0>KxXa8TtFV0p5^UXb~wP!BlGQ@B34_8l-1!k&TH z2_rpFX1~YAI?ypFM|}bM^G_)JZxxV#l+kWDAvgvRdm|*VbHb4IcdvYsF!qM+^&c89 z(gVJEYL>%!d}~BL?w4(aG&`(Rxf3O9_WTTR>Age_{3l5h{|S~Fl>OELr7%&1fbgPP zaKQWoZ&nGI_>V_n?cC9XBUCp-#)^tBSR$REysbp}0*I9(^}srCLAJlfX^@On034|J z!ZHPK;M5Gd{7c@xfNohYUYJw_)Xf>18VD%2u7w#)dUX-xYBWG)4C0(M4|?)kf4q^D{k; z07=w^JOxzc69AB(=MZROtK1(f^J*$D^x~PBvUX8rQ=nhVlRNO zYp=IT<0$&qMeGn!jJqF3=ji+TC z0T7Z@>usWvp>~QhGt7X&k1dTgNhKqRd{q?$1=dbtgC;5pkF=w_MwfCnXls7P8h=KD zg<2k^jB|}{P{ASt5BrtneIedU%OfD=F%zqoBFEhbFZb@uiu z-KD*}|LZ!-?~&M@?2N7<8sC&9@|Q!8v);u-!k|%n$ON#@1rGqwp}y6+ zUBen>aUGNH*Kv~k!^gYx$$+K}FTo<@G}(+^D1j(JWU$~}Tl#JIK&AR2zWv!PXkF!p z2G=5BY*<6X%p)E)`{mtsHk1*j+d?7b18!~Y7f-~%ak2;n;y8!zhVQY<1@xr{cgr0? z&51{BBz$aN$_LXNEEL9*9e7tIiyHx7KD?T~3xcg~X@i5PgsToT3n^{P(?>0|EJX$Q z+>?gBKqEbT^Us8BhM)`R@$K?MxK60OWNR7IE`WE&o(fsls28$f+$_@Mg5q}x10;M? zlgQ%YN$&5=4avYL^*y`8b>7C$=QJ(oO{wal;B0YVObyfM1)|`wROtsoLgtR(; zZgT>1CVU~0i>^ONC@Im?<_M;zUZR;n@8_m0Nx|p729FC62Mf4+shDYfl$JJeiNI&% z*Kef!^JvJYS11a1@ryP+QUl^I7?!vMbWjP`e*YyjDv-uL-w2Dd94mf6iB2CHd{2Za8hkv zkFLBMVQ#$>%dOTnVR7zv&Pl=r)q}7}E_ZK=X&rmahE7SOknt-oGX|_TY$#YoAIjX8 z$5I3I-uzsplrfugI+R(~AmfeozXw@V+5EK?pOc>o>~QlZ%smb!ImQPZX5^qsW)?1z1ocnRGm;D+CU|Xlu|rRIufbY%@{?~bb9P|D zf-@o#$p#94hfkucv$M%$ty^~(j!n`6Qhj&Or3~%S>#Z8+zY7qiGnVUp0-y7vXnYeV zW4>@>;*dzgcC#aww>h6YG&1+rKRnS%BC{@OZ);+0T}O1MAxUzPUXweZY-EIek;BR} zY@X}JA+vOgzQFwjEgj+NKW~fpH~WVqG7XR~%em~jx3h>?d|oIw8g7zCu(z9Z&X7g4 zYuWjn@Kt$fuJX)xODn{TfpZ{wl${oL)IuGFnU9}auviwmar0TFwt0zoU(m#D8IntQ zx%{A`M8)mV#g>fGTfB5C^wn49ygIgprb=Y^xQ0~(R{BdE$;5GL!WLb26|&zfrCD#O zoX32ya4G)H@7IUvh)CZlWhPv3Xiz&|!OnOBra>(S%tq6)XfHFP@j1^_(k_dS3W}8^ zec@}%Eg8t!Fu}zOY4O@wT&c@AxR-xr+fGs-x-ATW-LL+hXOVnjp=QR7bXjFtBO^ge z!<8Z1Ga7s{53!gjt9`u@NExoYlY|vWvzQ)ROmFxT#W1=(jMgU7Q_F6s_#$C$`ENPE zy*7I+4TeWqTqj`+Lg|~UBr#`N;-`p@8V!OYiIr}nORYN&NZKA6~M#5;SJDKwrXM(pj z+-SbDy2b2jLyYO0c5>n$oJw_q%r{=`#+b-`mv~(Bp~zeQpyIKQJ@fKjf-r2jbptv5kFn)Tq@L z@M_8Qvp5Ykrb(q={=h3ydWc|Rz=UjeU7pFHeWSl=kP-R#$mvi&{>l81{g0d)A8Z& z?8gW$kyCr$4yCv4!1AUIsuvANmC@@Im=_#dmLXa$G-i=>r1qYROxw-ro z+sGRICo00Iu3(lf`I@nPceX|Sz{=%gdJ}PhTNXnT&uShqyI7Fdv=Xk&CS5cjqEn6I z{Kzcv{8r1sj|@pOvd>jJU+IiJFx$K;_OX44IIgccw6<<(J+e+y9F06Xy`)RIyDl4V zM8B>LlUmErh!=TuQkO$;ZQ^k(fvu<1%+#*izPN@l4q}}*K2q#fdi=J0Y~oSUMSqyl ztJlTkXFNCOlNnaay4P^Pxp#>cw&m8yQpSbRm(nm3D(P>hdSIz*Ut6N4` zFrgDU*UCBdj;e&sCo%{(k;JC8EGlOTn__g8mikKtB+V(MDa1GJ3_F6o?UwD|h^8iD z6o|IQ+OIzztB2u?4f><(koDh=;0X?hv`xsgW|n+{S2I&&4YT0!w>v4An!T3iKsKcb zCY-=E-)CanFOg<3{@YxLkB+$nE`5`jZvEQVNK6S?C?-l$U*dv9a20QHXJCH#4Z-Jy zcaOt!H~z}leo>|ESJhzj1dBkmMUH^&6?;Lh1men@Txm0v-|r<7SY9Nh7Q11C`gkVU zi}cEq7v4Xy``h&KX%gN(e&-_A@9*gBxyBD{t z$z!g~kBZX*6$DAcnm)^U_3L}T>T>^{YjQ}K*i}7kviz}Y{1V}w$j_#luP6S=r>#k_cTE-hMi$)I#+gXm>X%>#w74Z@*Q&v=8Gtc=!T;335 zUA&`;fY1Zt7&1;{PK-%%w2%lDjZBn^vOynB!w=`z<%c^uo30+1L#UYl{G9a?DN(?} zzl64R3(0F{{9YpUN$=v+JE6M*wGR35ilHG%@yq@Km$IVD7|gvU%w$UhQ`PBR%&B_< ztN5mnB`|*vXLpJjU5fdYgGv>(GVp$G*8jvzl%S?LcTArNIwNTH3@_xu=U`ZNA@M7n z(n{RtJO)_>>+65`1g~ zR)&vp_eqA-HJW_+r=2@Q;`Y>f%q2e$^m4P7!{pftmj36Gz*)?pJrgQQ>^}Mct2Q zc&*<87UZ3;EqmyHGg3w-BSm7AFH(k9k&n35>c+;Y?92y}}cj!4!{*k{Y%g5ycS&9z) zxxc{X%~MH7FZy$Abjy1qCq0t1^q#cbZOYQR-{-(Grq3})hZ}6ZewysVBvMngxpbWC zm&f+<#q$5s#j@@UMCr<2zYdKRpm=jvv|>BDZzI%OOjI)tdUhELdt<&9w>jX{m+yIpS{dc`*5n;gj%tqPro_Dj5&_bY5V;E>BZu?4~b{E@|A|}`dxYz|= zYsgVj8>i=p{Bg3_uI1$`wvx~aw)@(RL9Gp##|(TD5oDo+DoldKT(QIosee7u+1>ymBo(hkf8olv z0XH@*TRwJVH}lkhdxvwvc`Jsh8M|k+QgRNLz&)X4jW1p23eJ5yQGTIG>kT$HoRKI| z^k>^5ljfm~u>xlWl~G~oqe}hJYR~tFd#JIK9HdNb_^%WBUn}|x1Qg+e#pw=&AHBya z1DXZb&lWHEew2|M>(q;Z2RL)}vX2Gu26ekmTaY@5^>*~Ed2fWKYfC$?J|JuSMVH}$ zfDPqlhUu{>QqF%`0Es*Xjf4ygm4}NQj0qRV#^qc|c!{|}mRRE{iNu}}y(4rvQq>92 zl=wSK_KYg$D5WJ#LuL{gX>yTT3HI)qJR8mSTT1SFvThAF62ZogI!XjssO4=7m)@&I z+b_jjCC*O}ja8W2Kpuy;(%R(NhuVIE>xw4PYS@uUc52f~Q;Pz;{nu;Lh|}yi&J1f! zHRgB=@RHS%fL5DZo9#%yEha>??TgBr-d@_wO|l}r zDDJ+HSHfzalN?O{F5@4m3|l(+ytI$O^>wKNHiFb3Vn3p6d0CEg1d}hE#WZA4P-S(e z;n2?&Ap8wpmoT=TJ$8k;KIhPyB&PM7eR5i9F~QFHVqd!b2AE5eSRXlQ^0C~MNS2uU zD&ER=9Qy=@JatLa+yydQBhrODHOq(ex1TF$9QZx_dWWRKh4(g!OQhoF;H{X*(ZrB# z2_1gIFbUApYh2jN)M!N@g^43-yw)^R*X|roFlI}*Q z>9g2=@Aa`Xc@$&1eg-i4GdU1v1vPz6H8-iZ{Eng>P?i?EFy>AREr09qA7;4hEqMb| zQj3}FTG}^M8g|iSaQm1qX43k47-SNDpDsT*WI8G7WgtT{UeT&$Y9ciSMI3oG|IniG z%spm}TdTNal0Jr}oN&oX@G`sJ`aMhkpG>0WK9_{?k;Mg3wgaPA>VO7<|BayWMbaH9 z;t@MlgXC^(Gi^U{{p~*1ra*=*7A7DQ(p;7`JCCJFN_>@iLT7DuA5lyA53ppf-dy4y z^ijBlsnPx3KwsR+rf#M$CMP(uH^DX0JlQ6nfOs5=HSB~H$_ta#4B3$y1C6rNvnD$U zyEnNj5@1XQOIz}M;3MJ1Us-&wRwW|--yozq_=2zhxGOOoeiz&QH+(p!YQi@EE(L`i z#=a~vgrQK3n+GPHPUv<-U#BAl70I{Ah%>^#e`){wy8}niZ&(Ap(L!r8%>lxV5dH5^ z#CkpdQCIS)Arua=PTw2o!A3r7(QIvVJdfIq&NbQ!&Yd=N)>AfqC3Z&;C(sgksS7(Dj1h0^PRR>|&?ce8am z{*!r+bvg5XY4MY{*b${wPf+6Wn$fo}(c>4j_zgB2i40sDq~%}S$}E)cm&5R7$Md?* zHX~7!mHPkdI8n=yh(pOmXP(T4Zh_bPQ5MaIN8!?S{v)3R=tpi9BQjW8AgSwK9P7I} z-4>0!WFlIF3KdCl}+`4ryCHOjfQT^#ED!=T@|BmKPL)?=X?Y zJ6* zyARK(66n`Jr!$o7=>;~iJ$jNsNh_eW`GPX%D_`v3t^%T|>Z6Ts`)5#3PXi(0;r>P@ z_HpWsQ@dFMnSduMWZj&2$EqHv80;ALA$O#p z(n`#N0O#RcmKB4N_mJ}rk(-O4xT!+v-jXFWwrP;mc!GDKHnbkz?eCF4$OT*no+y|h z^hNOAGp|Wv*m)?b-9Wit7}^Dr!IaCJTA5_0*jd?lDc ztHC!|gPf1ZjJnVVlWOI|@lDq}>f<59K_PN ztgfF}gzu|GgMTLjKp$7AvhT%7oV<+sRe4SNNqAsjrVH%q2k2axx)0lHwVV}A8r1GS zOPHCXt+dE0?j_Q&Su*Md;7$2a2aM*MpYTH$;8Z>`I9id&rvPiLJvm8Pc49s zkp1b^!V0q!nK0?h478@V@^`)`ziY3gl>WH|`M0ThIG4uVTQf`~QeY^zuCoNCz0ct6bOl1|v8!RVWlZKgUD?4RH4TIAvUuwP?IG8gpIMq*o~ zRtF-q$tOrdsGeU>Tgt)i>LWE>5}lPQeT}`8j&^X#(k-K7XZTB;My`bs2RM2;w7C1u2k(m# zBe*Ilg6QGRf$*H9`tv^BodG8uvWvI^n<46_Uonjs4lnjy^jS=&u37&f`s#@(k6SA2 zF);qITk@E$8cD1^tNLfNI{Qp;fT-S>h1lO1rMAhXC)gdQ|0MtFP0|eL_4WjdX3x*w zAu#yz>^;NAolK_J@H5jC27hNQYHbUOK;0QN`Wc3N=$G%EHEyC#-WZHcOkX8YaegLm zVVs=_*S7eTlAG8pOv9f%@C!dRP~bgq^%{ezEarg8)PCVLN%pXC*gO zaDh{$+N6>=Bg*5$q1W-S+ZKnaCG;=fQFqML)RAb_{@s&EejIjN(}RsyT;0`?)3#{# zhX8e}6f;=1?VdfhiJ#<4ZSD07_Dpcq{Dvi$^qMK>T(Kdh#!o)^ze+|c7O#f2bFY~! z7<8@XbN6`fI51WCmQ>Mb#Pvh0zQ~Cw->b8Zy1+nLKgya^2%aW63pK|1ZrziQyG}s? zHH9IwJyg$Lq|an|ws;y(Vl{B#^>d#IZmJ0vr(Z8Vt&?JYdf)a6f$yj^Q^PrCK=lcK zvUrro1J4UQO?F8 zpcyYBwzWN=FrX`Iho!tj zJ>js-_OejiGjP!8$U@~bSxOQ1DZl#2*|p<)|HE9Ho&#IisZL_&<>C`It~xJupTp?v z46G{5tN#cp;@0MT4_2ahz5#!0xJkt)>WbFG6ncHiKIFqOKLuGz32EM<>R zoBb#jm`tK_<*yY+5G#%gbzQ^g>wac+u@y1P+FdIXUhQxegZt5&jbEyVDch3rHlZ6H$>e} zjKM<+^696C_J2>AIzjlM$k0BScKuC3Gi*KJ-5b>A9}#~XZH zGrkHR?ii(EiK}20ZsI6-*7ArqT!TP&+BXx-amRyD)yYp6p~tU~*=U823EKDX>)t@W>3+m#u3GJLV0^=3Q||ZnL@Yc+zr*(109FKPIzEagjk7` zn;UB>+jAq2k#7gQFn;}NhizArJg5~bns-=z<^(++U*KU}cX(Ixp3Iz?R^F}Y>q*aK zh*4rdYE-y65ldkgYy^zFyZLyh{BTuQ-9DJ_HQ&iDi06#olq4QJ;|aagLk{?q_)?EA zr$w_?)Z%WdPvBm~g|L~{Z#KvIf=_P2;Z~t3yGA-%{7mp*9WIZnL})?Jz)I7>Ea z%C`Sn5)~mhM^or9a^c(8pceuY_0m}iTbVv!$Ze1E>zlLy%$AA~(}w%x`S15#Tk&`D zbv5Rw)Fq;iE@)wMFFx$jhl{WTM-LvInM%CSrR3XV!~|_~|L|}q@vROKBjNKW;dz4J z=WTnq!#o~-Pn^l0jYGW$YLeiq1rCQYiXwuQ zQrxF|NM#E|ZwX?E&^?RLXDh%1*pqax{JdI*k=;n{Z`-?YR}yaw(8fUp z*4jYK-%T*NgR9?Mh)#zSV*o5ve}eb#t`#WqR3k+*c8JqwLAAmY02%f_d^ml6a`%8y zCW?34u8U;N!!cjpk$lm|&I{~ls*jmh_iiIQawskqKB`7`TPpy?==qA^a_?UN*z4u6 zBypJ1Mt!|jolZzRCk(xxx$k*XSe>cAqijFFq`2dWiC{hABN+i(t^f*jN6k@cL;N%x zWn&;x!cYNhn@Z@J^@)_WojiCV!o$6{$dX?Sd3;u^Pv+Giq7CjTgna3UxJTo8;24sx zLsn2T8&J`yM1=gNg?MhIegHVw2CtR09y|3C=|Ejrqw0qj@C9*1XOT1z1Yxjh|u z0Cy*@bYaH_cdrK!I7+%%6CU~8m3tN}lX)Jw>CuxI_v8=5x=qm75g*7i)PAAE4%)#} zpq)ChP4yOXx_-pBaef_h*tx9R@Dz<5aEu4wamE3S=qm|UUJTc9#XQGVK6Q!$+rCAd#rax8A{M{zr1=`chb>5im5I`MCyzM2V^W6k{2 zMOs)aDu!qMf>cwmaw+42YMpX7O*b&Rs*(&%-%*OHhJx!tArkG9$c!vwAWst|KqjbG z0pnc*4B{FvKl%_d1|h1%0r4A1A{n1zM;i%ku!Qsn6}nUrkH}*4mA51pxRNwzW}y zR9MuwkKcKF`B^}fl^iK21l&-~2LR=G^~>DZ88Bc&#%r2^!}di0rer=uy4df=UW=&a z3>8&O-3alD^41m=i+Ug;JExd?o@o6A?RLkEZ2**hzDC_`->%XvOfgR%uPlD?p%B=B zLj7pR_1@m&Mf`1h=L>YvRPw(ZWL$^w6|nfysm>f|uJoktv>4FFu|n?ir*s%WPc9w=y(q-QE|vMrrYyKar?)(` zoq)iCGt)?XFddj!PIgcR=#d;W$ReAQwgYh>u{=IS55Pi~za^jU6-`_KB`yY1i|8MH z6)1*4X4Nz%;Q42ReRDPSAXs=y=#N4hY6j)opE?8Tlsy0oV9WR95?<09E?e-&@ogja zd|MCi)9j8s-csm_hQl5^6I14c7^12>%UMIlB810KpVF)_>5!$`+)X)Ev300OV|$}} z-PXg;@iRF9Ft*@5Lha_0B1Wa?HMSg^8c&}0Ga9ZP&d|Vpu}c0fH5vOjCciCD%9ywx zBM8TGZc_b9Y1jy0Z}VnvpS@Mf6M(+9K?rOUEP?FZpwR67aEtEo$PUyl&?l(eM1K>tn zTdF#$X!mW;5zjLaNT_bRnGzUql;f0b%s}|6zS>JxbRO9q`R~)ajh7%@7uR&_?C^8L zry=Vi6z{md8+i!v8SV&6{fO7s=v06&Iq*1J!Z2Oo0boZZKD%8gJ6*62n`tLVd|HVU z1YH8UX#9M}&a;|M4opQ@=b@VE{Xoc9V}Xm{$P`ds+LO$d{~o4 z3zUn75c{9)GWq95;k-l@VT+rJ`SfSx-XHS$wYY55cSYiTU2gsl`Z?dTMNeAVk5!Fi zC-dEEaD*am`RRZhzlMa5nxNW-=bE7uD&B-B*)m1gaBUknyvd%~_5qB*?zpE>yOiyE zP|euLva`!OsT8p3c42b%8#<%$^0Jx*!ywpeO1?Sb-ivu= z$K<V$?j`P#kRcy4<{lh_($FU`ezcqg`HK6wng}{}!7`K) z9e%7Y3Uq~+f(`D_WI<0t5CW*jAM`H5xRCxSfg21F7n(4{Yge{W9cw6@^Pzp_J{M)t zbN$a-&29mvT{3$zp6YVS-(q@+#zom^^@d;5qUBbHv4l%EMj5?c?q;qmwtiC$CtU}s zffj<#fioD#%pzJQD6a*voxB`@5bpy(Egl0hfcVA=)DXuCtHxL>Kv&JQbngMyA4u|J z1yJc#P;2Y~mr0XCI5mK4I_r+KB9LepdvgCy>`?hVSXo~5^&@&V_Qwl`Thq|6ZlCQX zUBSjnQ@GN!R<^btaq%EJQsm%bsk)IqvibFo%BQ*!thEwpzjkg5<>gzYC%a&{wo%=# zH*vY`R7WLHl)DOQ7g~S|da)4PL7buZ=7tm(k?vS~w*^EBUg$tF|CFP_%#e)=R7lV( z364037|@|Be(hWT1MlI;*}z=~T_A!Ac-f&1W;v5~l=-K&R)y4N zLnfCdJ`GUN;J&Ls1c8bzv0e0@nilppn0QFN36#cQs2Q~OXcx7w#37X(FOkP1i7STB5q3AiffQC@iZV^E*>LDrb^rcJT&bc|#F3Vrp2(mwjH!(G zoJDCIRssT-f-HbC{sCE*%S@ToA63}Cyrh6HFNX=^HCTm2wflFku0Txa(3tE!xyi;H zq?iGPw+=f`pdK6Oc6fM~;d9KA6|gMSqp3GXn)yr(`6U+mDUL z4ab4D>Q}*KI7YvYJ@t#W2T{G(M>lm;V_1fYvaxX{-`-R(Zug+_V}2QxK?{h>y3_b? z0V$f9Jz5V|D?48N3Z)UJllohen^}f6C<*@Q{b{||aPnLq3#F^;>%$c;S zeh?+}Ln>oeV6Cmbx3dD__mQG;0>LeJUhfJ=3ug^S;>Em!M(P4*}=oTyf(d zHK!ns1T!gX<~W*F1Hk>ZK7D@vMGGUTnLTa5zGKFr4&AC&c%dLBdTP*1*9r-m^ z-1Qjyv`Fs8`B&|x^_^$rC*NC%jD} zv~Ks8=L6PZ%*+u1EVUkl@zQl{l*rR}Az!M++gXm3^N@GWIz#$fI*j$B+5MMT=27C7m~wEdBP_w_ezX2`?3{@|28jC6FvNJgoT$`m%oW65nVH z^6%ewJ_!-=PB$Va*|f#^{IrBJaoDTnHAn{Zy<^>};(AdT;&LK%=#NoMua4WcdEMuE zkU_k~U{rhCzL>0Q{x=~fCrP0{_ymHP*^QDsjUEOv=T>2OI(Z51A}{H4lQKj}Ljhg!&4~ z#fs!VN>>6G#1kRjRHC;1xo(!J?kHN$|*xV_(1{c>rv2ZdD+JzSu z1p2!md3R*XmC|R;Z29P4NmDdE+B^ZETkDtk?{4Wd|0>fVn_Q>Ryh2r9O|QV^{BlRK zKJb{J#sSP6Tdwz)js(V&d2CJgSM8sWK%Oj@wXj7ZSyOM*(o2r*0h8%f?$QJ-ugSMT zIHRZC;|7(k*MU62XRZ(ds{~CWP{H&<^ue3&O+NjAKHe2vFqz`>OZEQ0*>C&;jSH06 zrxq={5~AVlJ9ZfOnEpEeDNJ$pS4OE1MTt%hWbf++(F|upUHZMGHZWddO3DEyhco2t z853@^?Qo*iBB^b4CXFV>*-wrU2Q+iE2KR}mf|$P?%1SwHFb&&Fs8j0RE}rs$_5qv1 zI%NH>VBmYJ*}DDD{ixonp&m&@3TVS=zpPf;rJ2(0VxDLZyY6d5ILu)!WaShNQk+*t z=$)^LGEj{%nHW~2V=}1`LU#tP zI{-QanEo)+2nM$ON{59}#mwreK@k*;`YDH*Vi49ax~9GxcMS>M&$+Ed3UcM&$7|;2 zyCe#Tl1#Ll8}EU;)6egRth7fILy2ZiW!?KRnQO->Eg1QPTA;LXWcWFVqmF&mmNq6p zzw7penZpdtHbC%+V<STM@#Vmgp9o4QT-4?2R;+x~D1p?G-8r0oDczNH|#+PE& z>1T`~LgnSzX&}$_!(4a#Xf$^%RC4&vO-f^}VjrucqfHIKC-}45(ak0)x|^gpiok6i z6P?ZMz%4BA2`mZ+gwI9fIqlVr=TNHLaSP$0;9R)9WT|MHu_h;i0 zIIO+~j~3mM_nP3OamrkOE0CEMdN{364yu97wYL?2Bz`hiaIWAW0j&`IbHPtrNvSC@qD-eT;J zL$y`o4^J~t!4N~jd>uIhRNVbz{6rpl%TDf$!%5pb>XDZQAWX7u8tbCVRR7t4N=9bK zqLS{45~f;>t_IrAuiS}@OiR%EPvMgMO8R1y?2pzV2E;*i>x=Jlpd*2){K@*}{_Aj~ z)xb`w0^Z+XEHt)|ZBzN>*wLiNcQH?KJg@fOc@7a#6%uHS<&>|}BGTVlgU?p9%b7)s z;tW;r*O>H9)VHtu5pLE?uhH7#wQs%~C)ah9c6O;|WLQRgNc(QD+*6y*n@}t*4g0doI zmqao5#bVAQQwpOI#YM`fq+7yoWTx`1la2ZhOtz*B z&RK0$njF?xe!Px?2KhbqL9lSSf-3)dfl$=kiE_*V+mPRmb(Yku4xAJdP+Bee;BKs5 z{A1`sA`Mg;q9CMlHuH$pg&u?mmeeRujitx;I@3m}gBVngu+-?xmbBVAEG^ zb<2&F%kHBxvv{<4FcW!H1CB|R(TFC$$N0sRaQHcLD04iWS>lxQt8D&ojBv>xPu_w% z6qa|06FN4!RF}cNZ&&j(I9s=hKM2-NsKgqi&?fR7$|vh5_z5k2fuPg+<@_Jr$3Kv$ zQu`zwpP~89T`ldQ4eM*I?#+dk;{wInH{74$u~)_Y?L#TfTPqLkd0%LlVf?yzA%T-v zP45(KItHZFY3`^|qG@j3JRjk}Y4^wn#==McIX-Z)yFg3z=jLkq^!oVK%W-FIB>_9} zX(roZXO8e3{J%(HizyShtzYKOXb7a}}c2^oUT| z#XG58XEd_pIwpm~eL1j6CzU3nYi^qy_Fd+=2+V)dwC9fkI(d#{#oowj(A>VqX`nRy|U4#&iE^>@(xvmtRhkFnF8jI(V=4UoXHBoO4g&jkP@{eL^iy} zEKNfTpR`vF1 z+~nIyz2DRDOcv>smbl3<;oq{NzCS)dg@66z!`MElF?T3Jw4Rqb<&Qf`zYykz8GC+D z0=qzz_iDrZuC%bY@y2WaP@G3v3Rym}>((RH%Fex%e4@KU;w^b1GDMBr<^`16UfrvE zmHI=K4lnVdLmIKif7oy7Ui zqOf+=k)6PPZzeJ4*DdFljMA*~uHcMZ+(iN9`0QPROlN9690#Ktc#V+oeGmk$rMj<=nD9kx!FVO+3w#6kK#rgd$k z^~n1fp6RR10q#-fk2=S?p0bOP`6otn*uRIwbCa@`@uaY#xbWwprS|MTM_1S`D1ywR zRwJ#be-tF!*x(O+K9f9DoXX8gT^t;t_!fJtsMB(w@dmqPlEXkWD6sSs5#|?!;D9$Z{2PdgZY_JCku>W`r_(N z)%^L*%wNXF_h>5_i`H(PbT{EvE(_-H0B zyh|513M2k8+IlU3Cg-I?6o-b+Ho8TnzEVxdxgfC>FSUoxHDXRdtcaT|`iMrC@f+iH zW&CPLr3LkasPG=ey!GN`FM(=61^Fz<*H_ct9{> z9-ak*n4)wh3Tk;3GqNJL;jF+i<`c>_i;I>=n=ZK(H#42$4(Y{;2Jh8XmUr8Dn^1El z{xoATmOg^*MEEjo#qKTsQTHq-5So&rxa~mqd^Oit^6`H%FWApokqxgPX z6kXwtBc8fg)S_KhTa}sDNPXFq=(C?ngz^D%@|L@z>-V`!IPS|BnIyL*RjM@?p65d# zdHOtk|L2Z={OQdLC)1zO#a7}hOqObF-(_T6LnISRbtxfb!eUBXFBp) zzhj-^Z4~Y4O#LdKyoh=HoZ*C4)}>=B+M+)8#m9iTfMD06&`LkK-MqOl4bQqqqNg zbWL3M=N9@1*6PTOlPioNLM!yM2Ki(|oz3-ArHS3YRCFuxJ9EnN_f3kH!cXqsftJHdjhtsxUFa_nkKin_8ZPo(mK>m{3{ z@L#FvtFL`uk-h0#_oZL!ZD?)5sL&ZccHy+{rjq21IV`6o=B!_Pbv%rAzvad0Lxu4U z!1PlD@I#o69tYI^@pKXBNuP$hV#DOcV+G%IzKxciwZmi1o}s^zd~?^ggh}%|#`mnA z?K&n`$urc_Ok$kXuOu6?4bu(m7W{qAB2zh=zG{yoe^w~nOxR6U+}DlR#xCSe@b<)| z=m(8SX)pDZ7(iZ0d|oMg4@k>WFV=CK>BR9#6>VoqIRdXU1u8 z@WLZMiDY_@-MSaFgr3%qc0G2mN0WRcP7hMqgxM0{_){zjW|O87)6 z=4l8GuMb%!=IHHR`m2(o8$~{K{#LrGnxm^DDg{bw4(aFtZ`b>XQsiP0o#fdq*OU~Y zRC~u}s>G>^NIJf;Qf=HX22#|m3lV{xU@@|&&F8AKzh)H{KJ|IyUL&~V~ZL6A(u}IwR(!Z>dN!I;U1Kuo))rfT2T9rTDv?0n6>9Z7Og(x zZYyca%B;j^wII9&Op9GG?SW~r!}_pSAZ0QN=oA{14IkL3-zW9$Uc2f2sJHyW0aWJ` z*B?KuWz2B|x`VbwIq1u`M@%k>_`#IIe(0qXn*W}n<20?CU$9==d?NBAmMTEm(I77M zz3fNP%OQt$^MoWUH2n_<=}{J>nZi* zB(JC5ox&^OOu)X&GNmR7>F;ohc#Z>BqqpCM)&EMCpV7#!!UVW<6vJHl`P$QZIj`37 zjHIg5m7l7)RTiIBQ>K2(R)67VP~k-~ryLdlfZwvg>fYVkJqM)y=Y+s3&%%&mE2pwp zaS)(`TYTR(7N&MzJXLvwLM^VqR4V{(laK~|Y}1J-gPmI>q=RWiJ7X?Av74i8spr?O zG2uR~^*4I5#52%cYB^bGJwj|8*|?-hA~V7Izf)+m}!!B3w27os;mW zmy_T+2ij8%v?h%<;jN8aA<>rtVMsiEgme*L6O86~Aj6bHcdHG!0ZhtBk!N)ZO$7Z{x$oifce^0{n zA7FM!G|1c}T2Jx@N|jbXV5U3WVlfzr5PQ4lQkCIa2{2(xncb6lXuKK znD;FpKL#YN&L-#QJ-~--e2~Izzn+E&qiNguHUI5DUS5y`)PTz8*Kz20)q@&E!UemW zq!yuPy~-VD7GvN{9_R-)i?n_+svLnNtKX@c<^Mst;ngBmfUog!2ApYhkGbDp7Xv$9 zN$^0)>XC&6)fxH1}tdW@Jwspq~ytBNPdJhIL8eV+PrZJQ_sdN^nJEe~}e zQvedo3xEMC%AA0N_|1eNQrEslydAfumSwR4ILH&FS!2DIPWMZN=QSUm3s zwA;pz`;}J{Mzs=1E?>5hHzCqpa$&)y}a}YcIeY1`++ulpH4M_doE2`e$aMLC;JAWi}X~9Ye@H&}6RpZ}1(i1u9?{OZ*2Qgn!LudAosK+&=LFm_+mg zuQ^tF*0XBZ&gbm;I)0l|3cOo*dupnjPL?6(XLs4z-(g^X(5e7^doZ7 za*BN7i_fMGRnT#(0rj_!eZK)vp<2E_C5;xQqMpZuFXo&utfS{~_xnJb^Ml1-fY7;H zMaPDfBE^ez6!S%%e^BHK1lRtt=?YGfW4!v9%|@@k!qNYezMJ07SJ<-i)4zfUdV zV#ptm>vL>mbd~KfbH6vrtb3$8G_%Vu!u?_rNdr_|P0>v7_|fGIGhxqPw5=j*2-#(m zEyN83HiF9lAJrSAW0`*k1g#S29`aJMSj9LjN~(P<-|5ZAG>6#Fj%C6w@ww%Vk;J7P z%e1kPNM(L~rOWhz<(ZmCW(VzbcOW}G{d0QIhD<}KaL<|@xNq7r zw!<05WY*UMLV%^!*t!%G+rt%qL4j52@6HG`ITb21W}X4xP=`Gf%a9cwpE{+u4#>>% zj_vg+(W&qd`?vdzKoB?ucJeE&9x92Ye1z`hcIw| zV(X7m(+F)MQRW^B{ILvX22v%1g=VujMcnGW;phu*vrHrOp{A|c zT_fk6*;jBu()~t5fEPGt92XY-$O0~Rwv`JJ3jf8^)x&cU0>j2=W@LtG9oRpsHW4pr z$-|y!23_Wr@5`%Q?`I|1{PVJjB- z&&XRjxf26cw1L<+ao{F~+kjR#cSuqF3yX&wscU+=|- zL8*X4G7vxt?~}*hrPME7D%5EfCgs@EqX;35Btm zG|q=DZ*)a$bw?LBr+OdUp5q+*=fr(7Q<~S$~U@ zQ=jr%Mo3h~p6TJDClhk)0eqEl!Pt{j>D6Ra9 z{1mk@s)-NgzZr>V`{5oN8!9nyXK!{&{Atsyr1`EI{qwQx=Gil^@6p|-@(Njo=BK`q zn15!(rGBmjhOM$9J201Z=**7X0L)k$DeCQQo|o~l#>VB5P8u6?70t3i>%;00m$;T` zk4C_e*#_>#3d^)q)Lp#$?*gE)Of`_%rb#H^UszxtTaM#zmOJO)=E0dchlkqTM%&~C_W@Cyo@)9pG<*=v!Pl|V)qjp`3qj`pV7b3%@f#4S$G(? zG7_fnVOy#ocj^2h$!8=l?URxPe`d>V6|A`It*6OR3I%k;MB@YK3lXubTosYRY1>T$~9+otCsdotSEf%^#hhYH~8z`D)f% zL6dI}!l$NVa(wipDfkIr6q*wP%nYMi#EtlX8PRo;oHL2HFcxeC&aS*Qedv)i<@<9p z&y0ppLdBZ_0sbH31bH2M@U#jnt$c56zz1lCPAvuDdv=^>%j%0EOp#!+>xLMMjwVRz zQod5_LgvOW=wIy)+o#4y9vXapv?%wZ0m4P`&5^*v{?QqV%;A&SOM*7yugNL7oeUo? z(a$|CJbLkl&l??)YkrW1#&(#ALqabWsBp;Dgp@|KSsOJCsCCF)SbjkTn92=>=K}ae zMao9lyTeNLv|#I|q>B(bHs#=U--eGYHf;y0eUAq^g;m@J;drjEl*5NoT$7WWCZS6V zfieLBaXl*Y4B;;5MK6FA%5*y0u#_r5q7@<4rg)V5px_xDAmr5YCQfc%1sB{ z{_fR#{B=!$7wi&&@_i{)H$*3$0}x8mlOj1^_BmVZ+`hdpq04s%in||WD=wPXb2Al} z5z`usJ6v(2pat5EVx7;=VkP?CB7v4!WD*T1tNIP-CYz&OchnQ?{T{%{JqvQ29+YKr z=#0=hJ@W&Zk9c((7=1ZB1Hgsbhz`*Yn#xn)qt481stqW>0uC*&m!`e|z^DQ9&C?(A zyDL}!j=AvGLlS;x%;COR@9(-u{0*&E#FO@*L|7ukZ7C%pgI5Gov$b>r)m7x0)h1#Q z&hF`gj!-4Bhu1sR_>X+8xb_Q>e>2DDGZ}+}q1i0V7Dg}?*M&}egfaBu1dUt&Z05rY)f+3R-Jvd=qKymrF#!)~q zP69%}QqkjkL(mR55?bkQD3aXx2lCVp%BJTlv4o?y(&r(Snqs=-=eih;Vq5lQE1N&S z@977~76YLJkpI)*cW1r=`(ZWEzSA0%FVg}P9S$q!du%)&M5!R+aEV;9Zj0h0>`7xr zuC(BwCO7Ax7H9w9#iM#`uX7=pbMHdp88LFuHvrt)eTf6ikW+Hx#M7 zCF!qa(pIV5z&~Pw9p&?PVmL_MJp$tjw~%~<_M0x?%=nDL>Ex{!I)5M*b$quSOmoZy z+MY#D*;aS&GLrz?SM8L%kgT1bkMIW6T3hU<9k7zEgA)&!uja)8I<~N#w%Fj-D-(C~ zgN!Lq6KUPo&aC%zA%YBL)3kW57}%3oo}M2SVp0{CJj2M;27uFn^;m`J&Yb8PATF`j zWr474HI!xzrhS`m!*@cqCjdh?yW=kp#E!lv0^oECv;8;FIR*#)ERKdB%6W)T+gbZo zL!{&YZF5pwV)8t|PoiFbC{bKYa(>|!(Xb7tlAQ!d*~7k@bGX~_V562MAY76csxnI~ z?ZNQb6L?;4aOaD>5%knKl+m!LR#z|4$A4?>%2%B-$jdH>QRMXT(2c*6eV+fpuS}A4 z(p3pADjxE%uz(R26;F0fKBc-G0!jcY=zDD_i>p7L1Hu3+^>dP%IIMjFZ3e3-9M)X# z!+o!sz;Ss^tZO6;#6s+;WX&%K!vHzhWOr+0-ckigan5GigpRdRS$RCZgT96Ss=-}* z_?BD_Iq{2#-T2S9@HvV3|5-MIcd*#rm5*TK2)zDVKpneusNwzmh$`~p=F+Oc#;Ry9fBkBs5#qL7ux)NTA9LZ{YJ|ADY`k%i(CbK;@(= zc%}teQO>-zoiu}O{ce~XQP%E((b)(3{Fn$3GqgX?We@`=;0RsB;t+?a;nlj=+;kVC znMp_Dg9Jyn0pgl*dUNg-yFd-b@ZfwU(4A`j&1KPEk2EpLq*{*LjyZ=UfqC99FdCT+ zp`=(fwX%OI;&zPj4+T2B=-!(y3GCREPqlOpK%x49iC1oPZ!4%XOJbsZDHQue|J=>E zr6hGAA6RI%e@1C-+&j%Te$Na5OY5rB$kLgB)pJrwKYpB_s#QODwk#VNjq+P0N}ETr zNwi)9-?TexaqVzS9IO|K6XX*CnT6kUCf2dy)0siaO`>-(7HCm<)y!5qy!@skN6W@R z*ksFV2Ko}C^fx=_At;7amBof@bcunV)aeINcXL}b?_Y0zl|1?uq{w@RE8NIUJLOWG4ASZK1~Tf9JCXs{|0>zLxb`MSuC&{Pdv@i4C;psNX{z@#Wgye^cO% zU7vN6r`KMfOxbxte!CiY`CrsD`fo*4#C&M#izI`tLcgTcxIX}BoPjYRmif-^MwMa` zXkPfQ+c`*NF>f1d)hbnD3)gO1uPGnWAo4qX&XvW|iNYfEzM*pLz)&v;os}}>!-Lv- zSd_kkDxj0(2BhboX8`l=={^?G2@|CwA9LUJwhqq#QhLR|PB6*5-bgJ_36rX$0CphH zb}0S)M5e|S02)$tepXNB+K-ajD7eB z*k&DQCGeXNTM)G7tNDi@bc;nRUV~Lelz?EMHP7Nfa7X&|o_itRr~AtCUc5~x%66kL z_fL~@%`#Dbaoc{&WA%}ne8XfEnYryfn`sV%b3`BA^Xx98l{Sz=>nID-0;4W1NMY2! z`6eCrjCR=u?4!0a@1^agmHO^}zQNcjIf@CYgcTer+ zCX$r?-GE6-+1>@Aegt>-aT={zrQ5Q7e<^^pbb zf0J~P+Qfwo+g*&J$r1V&`zqzRk6=i(&li3{>CKs61DF2TJ7{zzn2k&{x|a8@Xuw3h zS!T=4z!V?+@s$Jq-pq~|$q728C5lW-)RHBG|F zJ{`1NSc(O|Ur|vEbQ7US-b?+N&U4@*C_Mdsbk+s?o=9`)xSvvS_V=%mF1(h_yld<) zjfHMV-MzZE@$j~B-8K{m&*x(aHDH`DcRJ)~BXjgY#Z|!Aop30wc!VXGXqiE3;IvoN zV`wX|nK`qq&`LsU?mOPGGl1i~ad?f_uUVBC7E4+|4zp>F*S`*AMk$Av10i7`Vl*k1 z<+(M|AVRpd$T2G?C9YCjlmoe(r?Dl?@*O_SoJqO}eX+1)#WPjtGxnsukRVHZPAMRtL_|? zxCC}cDDXJ%AY#Q(u(-DF3LK~hCew-pi;i3tE^x>uUzB|U}643li`)dRGXW4;jF}8^F||<5jKYcE>KDpl_v3v~Cdp^JDUxH|(xARP^yIF^`e-jVev!aJSM&!vY za|}u$*wqN$K!b!3i>B>_oPZEP#pHDPWc83mu;jI?8AEk+hEPW{EbV8S7VD!NFI8)z zZ-$d8yYTQA@8Ka~?nkKfMX)G8(qq2)g!B1f4eHO`kZKMjZx@rSnumOGrAxan5hSd+ zJMARCJ^g!3(mnxHZcU>-nPzd(_X%*n%;XPw3(%Xyl_18*6uw)YO++b{p;?n zs)W1yOk7uw^pJ-*7LM*s-uijYZ!F5S{TiDM_Y>G~WujMA#qLf{Ss(h`@)bmi{)U!4 zMGq$?^(bA&E%_ptZTsoGEw%H+lj`g*Cb7B#TKByg(VhvcfG?$S3$SG3iq8N zGx5m37f3mxZm-4)QE`rCV%KYXP{1nyK^uRlT%4mE_Nxqsl_sjEnpibPef47Z<{`R3 zG6;}eykl`H+9Pe>c9e^Z z9OCm`Tt|^+k=x%-MC)7&F&eR;=EFopEBxGy&XY^xp|FEKXxeLC{dDC=VbnVTuZCh- zn1ESmwLh@ujo;Oh9fcO?A;<+uU$}uFEwoc6($BPUy}HM0Irr(q1Osu({Dm%!h7N&) z@^^H)FJpq%Az!l(`&+6ZR_>o5B$gBfa^rZ9DZUlb=BA(`&_j-?n#S|r9%;NGJ5{B3 zx~&VXWfA+~9@QH*q*@21g``KH*+6}4G{B%$U*RTm&=Oq(HWXBz!bIl$Q^ig}Z5J>VtzY{#$5MtkNxK9^k3F(5=>EI1e8YDquoQvWe zAN0Y;-j&4npVe!H=IMQrwGp=UfQo$M_@8ymG%AcBCdNg<$fF}burVlLv1Y_UNo^N7 zo-bW@c>lQ4z3fWAi`GA%ME$*qx7_f#wf)h%gAF8|+A;8DyclB0xtsu>v)Y}4c_MHr z>~Z100yt5O#9X9QMoDq;+-oPb_8GR*+l+7JsUH_e?3Atok#0@>3yaY~WFfx_Zy`eA zF&zuAuT4&?CB;eSCS|J})-%|-KO0PHb5`1geYE>HSa`QT2<9=F?>LcSs4UKoj`r?7 zciY9z#N?(JxoAwQ5S|%b3(^~{M;9|$eVv4KP%oce{ZXC%)9d{ zi)94S0Q~)#K$GRYtNxDX8z2o3*kBRn*(v#^$wxZ{x19@TBZQ318_fbF=lS<1s12rl z`zBr9t^ZsM8?OcPeBy{?*^Xl!x)}HzBjnJcQvN|mztAYa4~T$5GswNaYexa}pK<>G z_T~WKu%5BK`Q`!C(LM`r73{pmB>oS%`T}{u_1k}b1sGM!B#G|4ka!mg5TI+a1BvY` zK02%6s*M|2WAJXX@Ba@?pXX)T!dZhwSQYq3=!lfPh_QTNgBmfM+4L8&MOpww%I znFcml8b1feVgt z(&gzyIF$z@PyRYPA_dx3zLbV3x?rTga~dG{Q>Hsu3Y5=y{QSUO&V<}^ok6p8@`K$Kkro1r3BVs6iq-P{AHV=!N3vteJ>UsKON995M;K z{NbIv6M-NQ^$Tg`Ab^7_Ku{jAiCxus;p7j$GwXOLI{+mhgB$yQ00%DaPfPkevn-zV`4FN-L%7go%7eYD| zu}ir;s-p1&s(<+zq(74G;rTY6drbX9nS+rR@H;ENz3{D(ySZM88U%%302iToUhm*2 zAOh-;W@pLCZ=m$h1A6o;5W80+-&`cW^8!kQrN#sd31L8xqvB+rEK(e5m-t!x27!;N*Losu!X#HI&yfqW~eWMpZKr&BEh$P_*BkamxfCQDX@yG}2 z9Y@CZ&)oZfOngA{A9Vo6+F5u_9>{8K1M;`&4XF4wQDQ%z!#$%iZ5lfI^!~8v^Q)HF zIdJfH0epny)CB0Mj`x*2O(8>erVpTt&<6~$Z(9M-KtM(3)N^v+3*7dHtS9hu!G#)R zjE;~E{4q+QlTd@Ol`H2_iE(*w+RGCJMDvrXkbErk`v-=ODWo4=fo`SCk`q8vL{n)o z1&IgXuD`ITE`!R30;M9nzpw;#Z#g_y>z_-0^Pv|&OQ;o^eeLiH`z0>DfZ9d*{@cf! zuu*_uUPm5{xGevP02oZMlwgG0m~6`0>0zL9&XB`8G;NE)BApx@{vD74 zr%-~vf_BvzP#`koNH%Hlg^6c2+qn%KsHicG)w$doL9ljqojsb4_KM3PFqKsH(4UCx z#tfCC0!mjN0^Lvww60e%86>|$+72?<qV}D|ps0y79JHs8iHNKoBZCYCsE zgIsMpfpe|c3P2n^$&+dTkgurP6=F(2dPWtXmTUS2=I8w?HVfGYayP51*paFibc>K` z?$x@O{qF5vN{VG;PXo!&?!^MTSIquZ1dvJ-cd%`puF@F5O^xhBUeRAoZa))B&p!uL z-3lA``lSqE0!$$96)1+*fGzkvK~T?>b*hpb%tF!$_nJScvNr6CHdui{E8^RLOMaph zs#&~H?synS_;M($LujjbwaZHLCSZBYYCLD!0z7|y$b^$73v;M7<&I_HnlsenAy6fQ zO^g-w^D@cnA@Od%b{({j$0S=$Zh`c|l|nMqtMY}VZtv1KusyDWW3M}E(IE`ek=70Tu2hG@D)iV_{L2$>m$CGOTCJTF zYZD(}r0!1R@b^I7*gY`&Y_EPOmMbyqNT|`i;8d2h7F%NWeub*{<73ryVn1~4`@!o{ zlL!{m8aBBZO@+QqJdu3fg{bhad6ix+DuRrP&~oL?AB3#A(E~8vuQxl~WU(pjrr8K* z6>`!)r^%vSbIp|?2^_T^Kv^)n&W4=18t_;kZ%05Ub?({M# z@T8sm=L?jpaM;)n*VOsxP7$_#pc4DHRH8E4YW@+D$SZ*(+zLrj3u`g%9Qu(EbEmCx z>+OPHKtyKHz8jsxL=m{jGVyPd1ytE-u#iaM+Oa!<_gh$EJUpKeQMns+o<$ZAfED6( zC7@Ev^c&>C10F_4Sl|;4_}_T0X84Ex8E^Y)sxDTT|E6rNJDQaLOY9szA!cm9BTv+% z07HdsLI(`R&$|)h?32KO#GdVYlS5L-)2RxvXMB{WzVkR(`09u`=|2&Rv(og5T#HD9nVqK^^u2p7 zbi!_FFQDjpZth^6trgy!zoP{Opf}lP4PAHc)b(w0e}?)Yg`>pM}fHBD){E zt1Dj2QztVh7{#WVB`n;0XgNzJLx*NsbhwV?_ z&muWZR=Ln~k!w=x5JdPnvidqZvszkPyGKSI_kI1UFDxwVM(gYM=H@K7O|MY+k+W$o zZ2J9!--tvN6_uzt0|SHH*;!|C35h4iL`4m5-h9zFIA~N{Ts%2DtK}^1Jdzkbal<>kq@)2MmM zpJ@)bew|m8ifctwRFw4OiHflRKR;<|$^mmZe|ctRM&ID;i>%3xTI1UNW$aqI-4e@Y zQq=K*PbdXROboQT&E?xae-f97iq$&o_wU`iv(aYlTU%VL`-4`G+60HgB~B8#Nnwsw z_U9(o*Q@u@&@kl6$jW;BpPgLs+v0Kh=khts|F>`4sCf0n)2FGO%6fW!Dc2{jSagWv z<CxHf|Hm#XuKJ;T>*mdsYHDgOHqp`1OPPND z`J;1~C$RN~qln@%fvu}oYj54VckQoVzwA0z8J?RE*s(-0_}OuVDXgrluReSTkd={H zF=>*}#FP+;z>6&*j}5dYZQs6q)7AOYrl~!eIct`dVPdweva<8sxpPleUGQoMYrS+S zNHBDZ&5Y}9r(T3-ySthxDLI*%o9}k{`tYIS(&fufm)ShozHp)AEx9wFowDl1|G#^; zZi{ikTZ#EIXRh3~ZJW!zkBb=d>ztoTi{=#1-%#y(b$VrGZEdKcqT)JSu?Mt$YS!G} zjr&6X2)=#sBEa0-eATvXVOnb^1b!)S5>TwZq2#_^bIFIle`B8r)vkD<>1rx!6cHJz zdDT~YQP;;6pB|kJvry<42n`WGe*Ac7Ufw(QKwl3Jfrm$?S+6?quGrV#e|2js>(?(| zRMs-CP!Jav_s&@{XV!H##XnhZe{5kW4{YUGoOJuj6&2fEGo*7F=RNy-`Nu`?-P^XQ zJwH54Ksm+M#wI4pR}KS{~R{7Jm_`**ddu(0w~haS^o(VV=W%C%Jk++`+4wgvB;=xgx0aUORs}7sQ;$tn98r?{a=gmwOtx|V&%2M0RfNpG_&7`E zS>@Uz{D$o{yFWK?aBkUuiZRaRhn0R<(dmu$z>=WSbOHk~Ys3Mw#{b1Nk9$2=#3e;b z&Npeu(!KX|-kdpaqEuVNM599--xl1ktuy#GMcjG={}BnzRgE)5kGU|ZI56E)b_o0` z7d>w#i^qf6Koj>%YWTZ@BpCe~7D+weOsW8~(X6U^I(xM$W3=#s6}PNvdFHWrG_dFZ zIZuH|{4kLAYvtmA78(}3l%&<}GarBy3J5~6$e#{XxVd|r)F=mQ^P zSn%G%Q`1#Jj&V>|Sk$G^-QB%((W%_64*rsll^OSP1O$COWghMg3h8$&F^jw+pSC(O zsR($m^qBj9WGzuhZQTgAKcBNkAk?S#)zJ?hHq?lUih7#7iT#=zWFgJ@!i%9O>yO-_ z^FM5l#LC64XInQL6tGi%sjOlXS)4EW-|Sz7fCR`F6_Xo2E}Aj_?Hk`yHs&CK8|??` YPcC6dbDH`nlmQ4lUHx3vIVCg!03cp)vj6}9 literal 0 HcmV?d00001 diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 000000000..32bb24529 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 000000000..f1f94c681 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,3 @@ +sphinx +myst-parser +linkify diff --git a/docs/source/cli_reference.md b/docs/source/cli_reference.md new file mode 100644 index 000000000..a5ea03673 --- /dev/null +++ b/docs/source/cli_reference.md @@ -0,0 +1,485 @@ +# Llama CLI Reference + +The `llama` CLI tool helps you setup and use the Llama Stack & agentic systems. It should be available on your path after installing the `llama-stack` package. + +## Subcommands +1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. +2. `model`: Lists available models and their properties. +3. `stack`: Allows you to build and run a Llama Stack server. You can read more about this in Step 3 below. + +## Sample Usage + +``` +llama --help +``` +
+usage: llama [-h] {download,model,stack} ...
+
+Welcome to the Llama CLI
+
+options:
+  -h, --help            show this help message and exit
+
+subcommands:
+  {download,model,stack}
+
+ +## Step 1. Get the models + +You first need to have models downloaded locally. + +To download any model you need the **Model Descriptor**. +This can be obtained by running the command +``` +llama model list +``` + +You should see a table like this: + +
++----------------------------------+------------------------------------------+----------------+
+| Model Descriptor                 | Hugging Face Repo                        | Context Length |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-8B                      | meta-llama/Llama-3.1-8B                  | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-70B                     | meta-llama/Llama-3.1-70B                 | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B:bf16-mp8           | meta-llama/Llama-3.1-405B                | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B                    | meta-llama/Llama-3.1-405B-FP8            | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B:bf16-mp16          | meta-llama/Llama-3.1-405B                | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-8B-Instruct             | meta-llama/Llama-3.1-8B-Instruct         | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-70B-Instruct            | meta-llama/Llama-3.1-70B-Instruct        | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct:bf16-mp8  | meta-llama/Llama-3.1-405B-Instruct       | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct           | meta-llama/Llama-3.1-405B-Instruct-FP8   | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.1-405B-Instruct:bf16-mp16 | meta-llama/Llama-3.1-405B-Instruct       | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-1B                      | meta-llama/Llama-3.2-1B                  | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-3B                      | meta-llama/Llama-3.2-3B                  | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-11B-Vision              | meta-llama/Llama-3.2-11B-Vision          | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-90B-Vision              | meta-llama/Llama-3.2-90B-Vision          | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-1B-Instruct             | meta-llama/Llama-3.2-1B-Instruct         | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-3B-Instruct             | meta-llama/Llama-3.2-3B-Instruct         | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-11B-Vision-Instruct     | meta-llama/Llama-3.2-11B-Vision-Instruct | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama3.2-90B-Vision-Instruct     | meta-llama/Llama-3.2-90B-Vision-Instruct | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-11B-Vision         | meta-llama/Llama-Guard-3-11B-Vision      | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-1B:int4-mp1        | meta-llama/Llama-Guard-3-1B-INT4         | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-1B                 | meta-llama/Llama-Guard-3-1B              | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-8B                 | meta-llama/Llama-Guard-3-8B              | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-3-8B:int8-mp1        | meta-llama/Llama-Guard-3-8B-INT8         | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Prompt-Guard-86M                 | meta-llama/Prompt-Guard-86M              | 128K           |
++----------------------------------+------------------------------------------+----------------+
+| Llama-Guard-2-8B                 | meta-llama/Llama-Guard-2-8B              | 4K             |
++----------------------------------+------------------------------------------+----------------+
+
+ +To download models, you can use the llama download command. + +### Downloading from [Meta](https://llama.meta.com/llama-downloads/) + +Here is an example download command to get the 3B-Instruct/11B-Vision-Instruct model. You will need META_URL which can be obtained from [here](https://llama.meta.com/docs/getting_the_models/meta/) + +Download the required checkpoints using the following commands: +```bash +# download the 8B model, this can be run on a single GPU +llama download --source meta --model-id Llama3.2-3B-Instruct --meta-url META_URL + +# you can also get the 70B model, this will require 8 GPUs however +llama download --source meta --model-id Llama3.2-11B-Vision-Instruct --meta-url META_URL + +# llama-agents have safety enabled by default. For this, you will need +# safety models -- Llama-Guard and Prompt-Guard +llama download --source meta --model-id Prompt-Guard-86M --meta-url META_URL +llama download --source meta --model-id Llama-Guard-3-1B --meta-url META_URL +``` + +### Downloading from [Hugging Face](https://huggingface.co/meta-llama) + +Essentially, the same commands above work, just replace `--source meta` with `--source huggingface`. + +```bash +llama download --source huggingface --model-id Llama3.1-8B-Instruct --hf-token + +llama download --source huggingface --model-id Llama3.1-70B-Instruct --hf-token + +llama download --source huggingface --model-id Llama-Guard-3-1B --ignore-patterns *original* +llama download --source huggingface --model-id Prompt-Guard-86M --ignore-patterns *original* +``` + +**Important:** Set your environment variable `HF_TOKEN` or pass in `--hf-token` to the command to validate your access. You can find your token at [https://huggingface.co/settings/tokens](https://huggingface.co/settings/tokens). + +> **Tip:** Default for `llama download` is to run with `--ignore-patterns *.safetensors` since we use the `.pth` files in the `original` folder. For Llama Guard and Prompt Guard, however, we need safetensors. Hence, please run with `--ignore-patterns original` so that safetensors are downloaded and `.pth` files are ignored. + +### Downloading via Ollama + +If you're already using ollama, we also have a supported Llama Stack distribution `local-ollama` and you can continue to use ollama for managing model downloads. + +``` +ollama pull llama3.1:8b-instruct-fp16 +ollama pull llama3.1:70b-instruct-fp16 +``` + +> [!NOTE] +> Only the above two models are currently supported by Ollama. + + +## Step 2: Understand the models +The `llama model` command helps you explore the model’s interface. + +### 2.1 Subcommands +1. `download`: Download the model from different sources. (meta, huggingface) +2. `list`: Lists all the models available for download with hardware requirements to deploy the models. +3. `prompt-format`: Show llama model message formats. +4. `describe`: Describes all the properties of the model. + +### 2.2 Sample Usage + +`llama model ` + +``` +llama model --help +``` +
+usage: llama model [-h] {download,list,prompt-format,describe} ...
+
+Work with llama models
+
+options:
+  -h, --help            show this help message and exit
+
+model_subcommands:
+  {download,list,prompt-format,describe}
+
+ +You can use the describe command to know more about a model: +``` +llama model describe -m Llama3.2-3B-Instruct +``` +### 2.3 Describe + +
++-----------------------------+----------------------------------+
+| Model                       | Llama3.2-3B-Instruct             |
++-----------------------------+----------------------------------+
+| Hugging Face ID             | meta-llama/Llama-3.2-3B-Instruct |
++-----------------------------+----------------------------------+
+| Description                 | Llama 3.2 3b instruct model      |
++-----------------------------+----------------------------------+
+| Context Length              | 128K tokens                      |
++-----------------------------+----------------------------------+
+| Weights format              | bf16                             |
++-----------------------------+----------------------------------+
+| Model params.json           | {                                |
+|                             |     "dim": 3072,                 |
+|                             |     "n_layers": 28,              |
+|                             |     "n_heads": 24,               |
+|                             |     "n_kv_heads": 8,             |
+|                             |     "vocab_size": 128256,        |
+|                             |     "ffn_dim_multiplier": 1.0,   |
+|                             |     "multiple_of": 256,          |
+|                             |     "norm_eps": 1e-05,           |
+|                             |     "rope_theta": 500000.0,      |
+|                             |     "use_scaled_rope": true      |
+|                             | }                                |
++-----------------------------+----------------------------------+
+| Recommended sampling params | {                                |
+|                             |     "strategy": "top_p",         |
+|                             |     "temperature": 1.0,          |
+|                             |     "top_p": 0.9,                |
+|                             |     "top_k": 0                   |
+|                             | }                                |
++-----------------------------+----------------------------------+
+
+### 2.4 Prompt Format +You can even run `llama model prompt-format` see all of the templates and their tokens: + +``` +llama model prompt-format -m Llama3.2-3B-Instruct +``` +![alt text](https://github.com/meta-llama/llama-stack/docs/resources/prompt-format.png) + + + +You will be shown a Markdown formatted description of the model interface and how prompts / messages are formatted for various scenarios. + +**NOTE**: Outputs in terminal are color printed to show special tokens. + + +## Step 3: Building, and Configuring Llama Stack Distributions + +- Please see our [Getting Started](getting_started.md) guide for more details on how to build and start a Llama Stack distribution. + +### Step 3.1 Build +In the following steps, imagine we'll be working with a `Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: +- `name`: the name for our distribution (e.g. `8b-instruct`) +- `image_type`: our build image type (`conda | docker`) +- `distribution_spec`: our distribution specs for specifying API providers + - `description`: a short description of the configurations for the distribution + - `providers`: specifies the underlying implementation for serving each API endpoint + - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment. + + +At the end of build command, we will generate `-build.yaml` file storing the build configurations. + +After this step is complete, a file named `-build.yaml` will be generated and saved at the output file path specified at the end of the command. + +#### Building from scratch +- For a new user, we could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations. +``` +llama stack build +``` + +Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs. + +``` +> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-llama-stack +> Enter the image type you want your distribution to be built with (docker or conda): conda + + Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs. +> Enter the API provider for the inference API: (default=meta-reference): meta-reference +> Enter the API provider for the safety API: (default=meta-reference): meta-reference +> Enter the API provider for the agents API: (default=meta-reference): meta-reference +> Enter the API provider for the memory API: (default=meta-reference): meta-reference +> Enter the API provider for the telemetry API: (default=meta-reference): meta-reference + + > (Optional) Enter a short description for your Llama Stack distribution: + +Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/my-local-llama-stack-build.yaml +``` + +#### Building from templates +- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers. + +The following command will allow you to see the available templates and their corresponding providers. +``` +llama stack build --list-templates +``` + +![alt text](https://github.com/meta-llama/llama-stack/docs/resources/list-templates.png) + +You may then pick a template to build your distribution with providers fitted to your liking. + +``` +llama stack build --template local-tgi --name my-tgi-stack +``` + +``` +$ llama stack build --template local-tgi --name my-tgi-stack +... +... +Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml +You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml` +``` + +#### Building from config file +- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. + +- The config file will be of contents like the ones in `llama_stack/distributions/templates/`. + +``` +$ cat llama_stack/distribution/templates/local-ollama-build.yaml + +name: local-ollama +distribution_spec: + description: Like local, but use ollama for running LLM inference + providers: + inference: remote::ollama + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda +``` + +``` +llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml +``` + +#### How to build distribution with Docker image + +To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type. + +``` +llama stack build --template local --image-type docker --name docker-0 +``` + +Alternatively, you may use a config file and set `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build -build.yaml`. The `-build.yaml` will be of contents like: + +``` +name: local-docker-example +distribution_spec: + description: Use code from `llama_stack` itself to serve all llama stack APIs + docker_image: null + providers: + inference: meta-reference + memory: meta-reference-faiss + safety: meta-reference + agentic_system: meta-reference + telemetry: console +image_type: docker +``` + +The following command allows you to build a Docker image with the name `` +``` +llama stack build --config -build.yaml + +Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim +WORKDIR /app +... +... +You can run it with: podman run -p 8000:8000 llamastack-docker-local +Build spec configuration saved at ~/.llama/distributions/docker/docker-local-build.yaml +``` + + +### Step 3.2 Configure +After our distribution is built (either in form of docker or conda environment), we will run the following command to +``` +llama stack configure [ | | ] +``` +- For `conda` environments: would be the generated build spec saved from Step 1. +- For `docker` images downloaded from Dockerhub, you could also use as the argument. + - Run `docker images` to check list of available images on your machine. + +``` +$ llama stack configure ~/.llama/distributions/conda/8b-instruct-build.yaml + +Configuring API: inference (meta-reference) +Enter value for model (existing: Llama3.1-8B-Instruct) (required): +Enter value for quantization (optional): +Enter value for torch_seed (optional): +Enter value for max_seq_len (existing: 4096) (required): +Enter value for max_batch_size (existing: 1) (required): + +Configuring API: memory (meta-reference-faiss) + +Configuring API: safety (meta-reference) +Do you want to configure llama_guard_shield? (y/n): y +Entering sub-configuration for llama_guard_shield: +Enter value for model (default: Llama-Guard-3-1B) (required): +Enter value for excluded_categories (default: []) (required): +Enter value for disable_input_check (default: False) (required): +Enter value for disable_output_check (default: False) (required): +Do you want to configure prompt_guard_shield? (y/n): y +Entering sub-configuration for prompt_guard_shield: +Enter value for model (default: Prompt-Guard-86M) (required): + +Configuring API: agentic_system (meta-reference) +Enter value for brave_search_api_key (optional): +Enter value for bing_search_api_key (optional): +Enter value for wolfram_api_key (optional): + +Configuring API: telemetry (console) + +YAML configuration has been written to ~/.llama/builds/conda/8b-instruct-run.yaml +``` + +After this step is successful, you should be able to find a run configuration spec in `~/.llama/builds/conda/8b-instruct-run.yaml` with the following contents. You may edit this file to change the settings. + +As you can see, we did basic configuration above and configured: +- inference to run on model `Llama3.1-8B-Instruct` (obtained from `llama model list`) +- Llama Guard safety shield with model `Llama-Guard-3-1B` +- Prompt Guard safety shield with model `Prompt-Guard-86M` + +For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. + +Note that all configurations as well as models are stored in `~/.llama` + + +### Step 3.3 Run +Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step. + +``` +llama stack run ~/.llama/builds/conda/8b-instruct-run.yaml +``` + +You should see the Llama Stack server start and print the APIs that it is supporting + +``` +$ llama stack run ~/.llama/builds/local/conda/8b-instruct.yaml + +> initializing model parallel with size 1 +> initializing ddp with size 1 +> initializing pipeline with size 1 +Loaded in 19.28 seconds +NCCL version 2.20.5+cuda12.4 +Finished model load YES READY +Serving POST /inference/batch_chat_completion +Serving POST /inference/batch_completion +Serving POST /inference/chat_completion +Serving POST /inference/completion +Serving POST /safety/run_shield +Serving POST /agentic_system/memory_bank/attach +Serving POST /agentic_system/create +Serving POST /agentic_system/session/create +Serving POST /agentic_system/turn/create +Serving POST /agentic_system/delete +Serving POST /agentic_system/session/delete +Serving POST /agentic_system/memory_bank/detach +Serving POST /agentic_system/session/get +Serving POST /agentic_system/step/get +Serving POST /agentic_system/turn/get +Listening on :::5000 +INFO: Started server process [453333] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +> [!NOTE] +> Configuration is in `~/.llama/builds/local/conda/8b-instruct-run.yaml`. Feel free to increase `max_seq_len`. + +> [!IMPORTANT] +> The "local" distribution inference server currently only supports CUDA. It will not work on Apple Silicon machines. + +> [!TIP] +> You might need to use the flag `--disable-ipv6` to Disable IPv6 support + +This server is running a Llama model locally. + +### Step 3.4 Test with Client +Once the server is setup, we can test it with a client to see the example outputs. +``` +cd /path/to/llama-stack +conda activate # any environment containing the llama-stack pip package will work + +python -m llama_stack.apis.inference.client localhost 5000 +``` + +This will run the chat completion client and query the distribution’s /inference/chat_completion API. + +Here is an example output: +``` +User>hello world, write me a 2 sentence poem about the moon +Assistant> Here's a 2-sentence poem about the moon: + +The moon glows softly in the midnight sky, +A beacon of wonder, as it passes by. +``` + +Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: + +``` +python -m llama_stack.apis.safety.client localhost 5000 +``` + +You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. diff --git a/docs/source/conf.py b/docs/source/conf.py new file mode 100644 index 000000000..8f1d4b6ef --- /dev/null +++ b/docs/source/conf.py @@ -0,0 +1,53 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +project = "llama-stack" +copyright = "2024, Meta" +author = "Meta" + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = ["myst_parser"] + +templates_path = ["_templates"] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] + +myst_enable_extensions = [ + "amsmath", + "attrs_inline", + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_admonition", + "html_image", + # "linkify", + "replacements", + "smartquotes", + "strikethrough", + "substitution", + "tasklist", +] + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = "alabaster" +html_theme_options = { + "canonical_url": "https://github.com/meta-llama/llama-stack", +} + +html_static_path = ["../_static"] +html_logo = "../_static/llama-stack-logo.png" diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md new file mode 100644 index 000000000..61f8f46c6 --- /dev/null +++ b/docs/source/getting_started.md @@ -0,0 +1,430 @@ +# Getting Started + +This guide will walk you though the steps to get started on end-to-end flow for LlamaStack. This guide mainly focuses on getting started with building a LlamaStack distribution, and starting up a LlamaStack server. Please see our [documentations](https://github.com/meta-llama/llama-stack/README.md) on what you can do with Llama Stack, and [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main) on examples apps built with Llama Stack. + +## Installation +The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. + +You can install this repository as a [package](https://pypi.org/project/llama-stack/) with `pip install llama-stack` + +If you want to install from source: + +```bash +mkdir -p ~/local +cd ~/local +git clone git@github.com:meta-llama/llama-stack.git + +conda create -n stack python=3.10 +conda activate stack + +cd llama-stack +$CONDA_PREFIX/bin/pip install -e . +``` + +For what you can do with the Llama CLI, please refer to [CLI Reference](./cli_reference.md). + +## Quick Starting Llama Stack Server + +### Starting up server via docker + +We provide 2 pre-built Docker image of Llama Stack distribution, which can be found in the following links. +- [llamastack-local-gpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-gpu/general) + - This is a packaged version with our local meta-reference implementations, where you will be running inference locally with downloaded Llama model checkpoints. +- [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general) + - This is a lite version with remote inference where you can hook up to your favourite remote inference framework (e.g. ollama, fireworks, together, tgi) for running inference without GPU. + +> [!NOTE] +> For GPU inference, you need to set these environment variables for specifying local directory containing your model checkpoints, and enable GPU inference to start running docker container. +``` +export LLAMA_CHECKPOINT_DIR=~/.llama +``` + +> [!NOTE] +> `~/.llama` should be the path containing downloaded weights of Llama models. + + +To download and start running a pre-built docker container, you may use the following commands: + +``` +docker run -it -p 5000:5000 -v ~/.llama:/root/.llama --gpus=all llamastack/llamastack-local-gpu +``` + +> [!TIP] +> Pro Tip: We may use `docker compose up` for starting up a distribution with remote providers (e.g. TGI) using [llamastack-local-cpu](https://hub.docker.com/repository/docker/llamastack/llamastack-local-cpu/general). You can checkout [these scripts](https://github.com/meta-llama/llama-stack/llama_stack/distribution/docker/README.md) to help you get started. + +### Build->Configure->Run Llama Stack server via conda +You may also build a LlamaStack distribution from scratch, configure it, and start running the distribution. This is useful for developing on LlamaStack. + +**`llama stack build`** +- You'll be prompted to enter build information interactively. +``` +llama stack build + +> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): my-local-stack +> Enter the image type you want your distribution to be built with (docker or conda): conda + + Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs. +> Enter the API provider for the inference API: (default=meta-reference): meta-reference +> Enter the API provider for the safety API: (default=meta-reference): meta-reference +> Enter the API provider for the agents API: (default=meta-reference): meta-reference +> Enter the API provider for the memory API: (default=meta-reference): meta-reference +> Enter the API provider for the telemetry API: (default=meta-reference): meta-reference + + > (Optional) Enter a short description for your Llama Stack distribution: + +Build spec configuration saved at ~/.conda/envs/llamastack-my-local-stack/my-local-stack-build.yaml +You can now run `llama stack configure my-local-stack` +``` + +**`llama stack configure`** +- Run `llama stack configure ` with the name you have previously defined in `build` step. +``` +llama stack configure +``` +- You will be prompted to enter configurations for your Llama Stack + +``` +$ llama stack configure my-local-stack + +Could not find my-local-stack. Trying conda build name instead... +Configuring API `inference`... +=== Configuring provider `meta-reference` for API inference... +Enter value for model (default: Llama3.1-8B-Instruct) (required): +Do you want to configure quantization? (y/n): n +Enter value for torch_seed (optional): +Enter value for max_seq_len (default: 4096) (required): +Enter value for max_batch_size (default: 1) (required): + +Configuring API `safety`... +=== Configuring provider `meta-reference` for API safety... +Do you want to configure llama_guard_shield? (y/n): n +Do you want to configure prompt_guard_shield? (y/n): n + +Configuring API `agents`... +=== Configuring provider `meta-reference` for API agents... +Enter `type` for persistence_store (options: redis, sqlite, postgres) (default: sqlite): + +Configuring SqliteKVStoreConfig: +Enter value for namespace (optional): +Enter value for db_path (default: /home/xiyan/.llama/runtime/kvstore.db) (required): + +Configuring API `memory`... +=== Configuring provider `meta-reference` for API memory... +> Please enter the supported memory bank type your provider has for memory: vector + +Configuring API `telemetry`... +=== Configuring provider `meta-reference` for API telemetry... + +> YAML configuration has been written to ~/.llama/builds/conda/my-local-stack-run.yaml. +You can now run `llama stack run my-local-stack --port PORT` +``` + +**`llama stack run`** +- Run `llama stack run ` with the name you have previously defined. +``` +llama stack run my-local-stack + +... +> initializing model parallel with size 1 +> initializing ddp with size 1 +> initializing pipeline with size 1 +... +Finished model load YES READY +Serving POST /inference/chat_completion +Serving POST /inference/completion +Serving POST /inference/embeddings +Serving POST /memory_banks/create +Serving DELETE /memory_bank/documents/delete +Serving DELETE /memory_banks/drop +Serving GET /memory_bank/documents/get +Serving GET /memory_banks/get +Serving POST /memory_bank/insert +Serving GET /memory_banks/list +Serving POST /memory_bank/query +Serving POST /memory_bank/update +Serving POST /safety/run_shield +Serving POST /agentic_system/create +Serving POST /agentic_system/session/create +Serving POST /agentic_system/turn/create +Serving POST /agentic_system/delete +Serving POST /agentic_system/session/delete +Serving POST /agentic_system/session/get +Serving POST /agentic_system/step/get +Serving POST /agentic_system/turn/get +Serving GET /telemetry/get_trace +Serving POST /telemetry/log_event +Listening on :::5000 +INFO: Started server process [587053] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +### End-to-end flow of building, configuring, running, and testing a Distribution + +#### Step 1. Build +In the following steps, imagine we'll be working with a `Meta-Llama3.1-8B-Instruct` model. We will name our build `8b-instruct` to help us remember the config. We will start build our distribution (in the form of a Conda environment, or Docker image). In this step, we will specify: +- `name`: the name for our distribution (e.g. `8b-instruct`) +- `image_type`: our build image type (`conda | docker`) +- `distribution_spec`: our distribution specs for specifying API providers + - `description`: a short description of the configurations for the distribution + - `providers`: specifies the underlying implementation for serving each API endpoint + - `image_type`: `conda` | `docker` to specify whether to build the distribution in the form of Docker image or Conda environment. + + +At the end of build command, we will generate `-build.yaml` file storing the build configurations. + +After this step is complete, a file named `-build.yaml` will be generated and saved at the output file path specified at the end of the command. + +#### Building from scratch +- For a new user, we could start off with running `llama stack build` which will allow you to a interactively enter wizard where you will be prompted to enter build configurations. +``` +llama stack build +``` + +Running the command above will allow you to fill in the configuration to build your Llama Stack distribution, you will see the following outputs. + +``` +> Enter an unique name for identifying your Llama Stack build distribution (e.g. my-local-stack): 8b-instruct +> Enter the image type you want your distribution to be built with (docker or conda): conda + + Llama Stack is composed of several APIs working together. Let's configure the providers (implementations) you want to use for these APIs. +> Enter the API provider for the inference API: (default=meta-reference): meta-reference +> Enter the API provider for the safety API: (default=meta-reference): meta-reference +> Enter the API provider for the agents API: (default=meta-reference): meta-reference +> Enter the API provider for the memory API: (default=meta-reference): meta-reference +> Enter the API provider for the telemetry API: (default=meta-reference): meta-reference + + > (Optional) Enter a short description for your Llama Stack distribution: + +Build spec configuration saved at ~/.conda/envs/llamastack-my-local-llama-stack/8b-instruct-build.yaml +``` + +**Ollama (optional)** + +If you plan to use Ollama for inference, you'll need to install the server [via these instructions](https://ollama.com/download). + + +#### Building from templates +- To build from alternative API providers, we provide distribution templates for users to get started building a distribution backed by different providers. + +The following command will allow you to see the available templates and their corresponding providers. +``` +llama stack build --list-templates +``` + +![alt text](https://github.com/meta-llama/llama-stack/docs/resources/list-templates.png) + +You may then pick a template to build your distribution with providers fitted to your liking. + +``` +llama stack build --template local-tgi --name my-tgi-stack +``` + +``` +$ llama stack build --template local-tgi --name my-tgi-stack +... +... +Build spec configuration saved at ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml +You may now run `llama stack configure my-tgi-stack` or `llama stack configure ~/.conda/envs/llamastack-my-tgi-stack/my-tgi-stack-build.yaml` +``` + +#### Building from config file +- In addition to templates, you may customize the build to your liking through editing config files and build from config files with the following command. + +- The config file will be of contents like the ones in `llama_stack/distributions/templates/`. + +``` +$ cat llama_stack/distribution/templates/local-ollama-build.yaml + +name: local-ollama +distribution_spec: + description: Like local, but use ollama for running LLM inference + providers: + inference: remote::ollama + memory: meta-reference + safety: meta-reference + agents: meta-reference + telemetry: meta-reference +image_type: conda +``` + +``` +llama stack build --config llama_stack/distribution/templates/local-ollama-build.yaml +``` + +#### How to build distribution with Docker image + +> [!TIP] +> Podman is supported as an alternative to Docker. Set `DOCKER_BINARY` to `podman` in your environment to use Podman. + +To build a docker image, you may start off from a template and use the `--image-type docker` flag to specify `docker` as the build image type. + +``` +llama stack build --template local --image-type docker --name docker-0 +``` + +Alternatively, you may use a config file and set `image_type` to `docker` in our `-build.yaml` file, and run `llama stack build -build.yaml`. The `-build.yaml` will be of contents like: + +``` +name: local-docker-example +distribution_spec: + description: Use code from `llama_stack` itself to serve all llama stack APIs + docker_image: null + providers: + inference: meta-reference + memory: meta-reference-faiss + safety: meta-reference + agentic_system: meta-reference + telemetry: console +image_type: docker +``` + +The following command allows you to build a Docker image with the name `` +``` +llama stack build --config -build.yaml + +Dockerfile created successfully in /tmp/tmp.I0ifS2c46A/DockerfileFROM python:3.10-slim +WORKDIR /app +... +... +You can run it with: podman run -p 8000:8000 llamastack-docker-local +Build spec configuration saved at ~/.llama/distributions/docker/docker-local-build.yaml +``` + + +### Step 2. Configure +After our distribution is built (either in form of docker or conda environment), we will run the following command to +``` +llama stack configure [ | | ] +``` +- For `conda` environments: would be the generated build spec saved from Step 1. +- For `docker` images downloaded from Dockerhub, you could also use as the argument. + - Run `docker images` to check list of available images on your machine. + +``` +$ llama stack configure 8b-instruct + +Configuring API: inference (meta-reference) +Enter value for model (existing: Meta-Llama3.1-8B-Instruct) (required): +Enter value for quantization (optional): +Enter value for torch_seed (optional): +Enter value for max_seq_len (existing: 4096) (required): +Enter value for max_batch_size (existing: 1) (required): + +Configuring API: memory (meta-reference-faiss) + +Configuring API: safety (meta-reference) +Do you want to configure llama_guard_shield? (y/n): y +Entering sub-configuration for llama_guard_shield: +Enter value for model (default: Llama-Guard-3-1B) (required): +Enter value for excluded_categories (default: []) (required): +Enter value for disable_input_check (default: False) (required): +Enter value for disable_output_check (default: False) (required): +Do you want to configure prompt_guard_shield? (y/n): y +Entering sub-configuration for prompt_guard_shield: +Enter value for model (default: Prompt-Guard-86M) (required): + +Configuring API: agentic_system (meta-reference) +Enter value for brave_search_api_key (optional): +Enter value for bing_search_api_key (optional): +Enter value for wolfram_api_key (optional): + +Configuring API: telemetry (console) + +YAML configuration has been written to ~/.llama/builds/conda/8b-instruct-run.yaml +``` + +After this step is successful, you should be able to find a run configuration spec in `~/.llama/builds/conda/8b-instruct-run.yaml` with the following contents. You may edit this file to change the settings. + +As you can see, we did basic configuration above and configured: +- inference to run on model `Meta-Llama3.1-8B-Instruct` (obtained from `llama model list`) +- Llama Guard safety shield with model `Llama-Guard-3-1B` +- Prompt Guard safety shield with model `Prompt-Guard-86M` + +For how these configurations are stored as yaml, checkout the file printed at the end of the configuration. + +Note that all configurations as well as models are stored in `~/.llama` + + +### Step 3. Run +Now, let's start the Llama Stack Distribution Server. You will need the YAML configuration file which was written out at the end by the `llama stack configure` step. + +``` +llama stack run 8b-instruct +``` + +You should see the Llama Stack server start and print the APIs that it is supporting + +``` +$ llama stack run 8b-instruct + +> initializing model parallel with size 1 +> initializing ddp with size 1 +> initializing pipeline with size 1 +Loaded in 19.28 seconds +NCCL version 2.20.5+cuda12.4 +Finished model load YES READY +Serving POST /inference/batch_chat_completion +Serving POST /inference/batch_completion +Serving POST /inference/chat_completion +Serving POST /inference/completion +Serving POST /safety/run_shield +Serving POST /agentic_system/memory_bank/attach +Serving POST /agentic_system/create +Serving POST /agentic_system/session/create +Serving POST /agentic_system/turn/create +Serving POST /agentic_system/delete +Serving POST /agentic_system/session/delete +Serving POST /agentic_system/memory_bank/detach +Serving POST /agentic_system/session/get +Serving POST /agentic_system/step/get +Serving POST /agentic_system/turn/get +Listening on :::5000 +INFO: Started server process [453333] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://[::]:5000 (Press CTRL+C to quit) +``` + +> [!NOTE] +> Configuration is in `~/.llama/builds/local/conda/8b-instruct-run.yaml`. Feel free to increase `max_seq_len`. + +> [!IMPORTANT] +> The "local" distribution inference server currently only supports CUDA. It will not work on Apple Silicon machines. + +> [!TIP] +> You might need to use the flag `--disable-ipv6` to Disable IPv6 support + +This server is running a Llama model locally. + +### Step 4. Test with Client +Once the server is setup, we can test it with a client to see the example outputs. +``` +cd /path/to/llama-stack +conda activate # any environment containing the llama-stack pip package will work + +python -m llama_stack.apis.inference.client localhost 5000 +``` + +This will run the chat completion client and query the distribution’s /inference/chat_completion API. + +Here is an example output: +``` +User>hello world, write me a 2 sentence poem about the moon +Assistant> Here's a 2-sentence poem about the moon: + +The moon glows softly in the midnight sky, +A beacon of wonder, as it passes by. +``` + +Similarly you can test safety (if you configured llama-guard and/or prompt-guard shields) by: + +``` +python -m llama_stack.apis.safety.client localhost 5000 +``` + + +Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. + +You can find more example scripts with client SDKs to talk with the Llama Stack server in our [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) repo. diff --git a/docs/source/index.md b/docs/source/index.md new file mode 100644 index 000000000..7d95eaf40 --- /dev/null +++ b/docs/source/index.md @@ -0,0 +1,40 @@ +# llama-stack documentation + +Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. It empowers developers building agentic applications by giving them options to operate in various environments (on-prem, cloud, single-node, on-device) while relying on a standard API interface and the same DevEx that is certified by Meta. + +The Llama Stack defines and standardizes the building blocks needed to bring generative AI applications to market. These blocks span the entire development lifecycle: from model training and fine-tuning, through product evaluation, to building and running AI agents in production. Beyond definition, we are building providers for the Llama Stack APIs. These were developing open-source versions and partnering with providers, ensuring developers can assemble AI solutions using consistent, interlocking pieces across platforms. The ultimate goal is to accelerate innovation in the AI space. + +The Stack APIs are rapidly improving, but still very much work in progress and we invite feedback as well as direct contributions. + +![Llama Stack](../_static/llama-stack.png) + +## APIs + +The Llama Stack consists of the following set of APIs: + +- Inference +- Safety +- Memory +- Agentic System +- Evaluation +- Post Training +- Synthetic Data Generation +- Reward Scoring +Each of the APIs themselves is a collection of REST endpoints. + +## API Providers + +A Provider is what makes the API real -- they provide the actual implementation backing the API. + +As an example, for Inference, we could have the implementation be backed by open source libraries like [ torch | vLLM | TensorRT ] as possible options. + +A provider can also be just a pointer to a remote REST service -- for example, cloud providers or dedicated inference providers could serve these APIs. + +## Distribution + +A Distribution is where APIs and Providers are assembled together to provide a consistent whole to the end application developer. You can mix-and-match providers -- some could be backed by local code and some could be remote. As a hobbyist, you can serve a small model locally, but can choose a cloud provider for a large model. Regardless, the higher level APIs your app needs to work with don't need to change at all. You can even imagine moving across the server / mobile-device boundary as well always using the same uniform set of APIs for developing Generative AI applications. + +```{toctree} +cli_reference.md +getting_started.md +``` From 1d241bf3fe7e1fb65b5b0dabbc70aa48e24a4235 Mon Sep 17 00:00:00 2001 From: Dinesh Yeduguru Date: Mon, 21 Oct 2024 22:26:33 -0700 Subject: [PATCH 39/40] add completion() for ollama (#280) --- .../adapters/inference/ollama/ollama.py | 62 ++++++++++++++- .../inference/provider_config_example.yaml | 4 + .../tests/inference/test_inference.py | 5 +- .../utils/inference/openai_compat.py | 75 +++++++++++++++---- .../utils/inference/prompt_adapter.py | 7 ++ 5 files changed, 138 insertions(+), 15 deletions(-) diff --git a/llama_stack/providers/adapters/inference/ollama/ollama.py b/llama_stack/providers/adapters/inference/ollama/ollama.py index 74aed6e5e..b19d54182 100644 --- a/llama_stack/providers/adapters/inference/ollama/ollama.py +++ b/llama_stack/providers/adapters/inference/ollama/ollama.py @@ -23,9 +23,12 @@ from llama_stack.providers.utils.inference.openai_compat import ( OpenAICompatCompletionResponse, process_chat_completion_response, process_chat_completion_stream_response, + process_completion_response, + process_completion_stream_response, ) from llama_stack.providers.utils.inference.prompt_adapter import ( chat_completion_request_to_prompt, + completion_request_to_prompt, ) OLLAMA_SUPPORTED_MODELS = { @@ -93,7 +96,64 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate): stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, ) -> AsyncGenerator: - raise NotImplementedError() + request = CompletionRequest( + model=model, + content=content, + sampling_params=sampling_params, + stream=stream, + logprobs=logprobs, + ) + if stream: + return self._stream_completion(request) + else: + return await self._nonstream_completion(request) + + def _get_params_for_completion(self, request: CompletionRequest) -> dict: + sampling_options = get_sampling_options(request) + # This is needed since the Ollama API expects num_predict to be set + # for early truncation instead of max_tokens. + if sampling_options["max_tokens"] is not None: + sampling_options["num_predict"] = sampling_options["max_tokens"] + return { + "model": OLLAMA_SUPPORTED_MODELS[request.model], + "prompt": completion_request_to_prompt(request, self.formatter), + "options": sampling_options, + "raw": True, + "stream": request.stream, + } + + async def _stream_completion(self, request: CompletionRequest) -> AsyncGenerator: + params = self._get_params_for_completion(request) + + async def _generate_and_convert_to_openai_compat(): + s = await self.client.generate(**params) + async for chunk in s: + choice = OpenAICompatCompletionChoice( + finish_reason=chunk["done_reason"] if chunk["done"] else None, + text=chunk["response"], + ) + yield OpenAICompatCompletionResponse( + choices=[choice], + ) + + stream = _generate_and_convert_to_openai_compat() + async for chunk in process_completion_stream_response(stream, self.formatter): + yield chunk + + async def _nonstream_completion(self, request: CompletionRequest) -> AsyncGenerator: + params = self._get_params_for_completion(request) + r = await self.client.generate(**params) + assert isinstance(r, dict) + + choice = OpenAICompatCompletionChoice( + finish_reason=r["done_reason"] if r["done"] else None, + text=r["response"], + ) + response = OpenAICompatCompletionResponse( + choices=[choice], + ) + + return process_completion_response(response, self.formatter) async def chat_completion( self, diff --git a/llama_stack/providers/tests/inference/provider_config_example.yaml b/llama_stack/providers/tests/inference/provider_config_example.yaml index c4bb4af16..675ece1ea 100644 --- a/llama_stack/providers/tests/inference/provider_config_example.yaml +++ b/llama_stack/providers/tests/inference/provider_config_example.yaml @@ -4,6 +4,10 @@ providers: config: host: localhost port: 11434 + - provider_id: meta-reference + provider_type: meta-reference + config: + model: Llama3.2-1B-Instruct - provider_id: test-tgi provider_type: remote::tgi config: diff --git a/llama_stack/providers/tests/inference/test_inference.py b/llama_stack/providers/tests/inference/test_inference.py index 09d6a69db..afec9a837 100644 --- a/llama_stack/providers/tests/inference/test_inference.py +++ b/llama_stack/providers/tests/inference/test_inference.py @@ -132,7 +132,10 @@ async def test_completion(inference_settings): params = inference_settings["common_params"] provider = inference_impl.routing_table.get_provider_impl(params["model"]) - if provider.__provider_id__ != "meta-reference": + if provider.__provider_spec__.provider_type not in ( + "meta-reference", + "remote::ollama", + ): pytest.skip("Other inference providers don't support completion() yet") response = await inference_impl.completion( diff --git a/llama_stack/providers/utils/inference/openai_compat.py b/llama_stack/providers/utils/inference/openai_compat.py index 72db7b18c..add29da99 100644 --- a/llama_stack/providers/utils/inference/openai_compat.py +++ b/llama_stack/providers/utils/inference/openai_compat.py @@ -34,6 +34,8 @@ def get_sampling_options(request: ChatCompletionRequest) -> dict: if params := request.sampling_params: for attr in {"temperature", "top_p", "top_k", "max_tokens"}: if getattr(params, attr): + if attr == "max_tokens": + options["num_predict"] = getattr(params, attr) options[attr] = getattr(params, attr) if params.repetition_penalty is not None and params.repetition_penalty != 1.0: @@ -49,25 +51,35 @@ def text_from_choice(choice) -> str: return choice.text +def get_stop_reason(finish_reason: str) -> StopReason: + if finish_reason in ["stop", "eos"]: + return StopReason.end_of_turn + elif finish_reason == "eom": + return StopReason.end_of_message + elif finish_reason == "length": + return StopReason.out_of_tokens + + return StopReason.out_of_tokens + + +def process_completion_response( + response: OpenAICompatCompletionResponse, formatter: ChatFormat +) -> CompletionResponse: + choice = response.choices[0] + + return CompletionResponse( + stop_reason=get_stop_reason(choice.finish_reason), + content=choice.text, + ) + + def process_chat_completion_response( response: OpenAICompatCompletionResponse, formatter: ChatFormat ) -> ChatCompletionResponse: choice = response.choices[0] - stop_reason = None - if reason := choice.finish_reason: - if reason in ["stop", "eos"]: - stop_reason = StopReason.end_of_turn - elif reason == "eom": - stop_reason = StopReason.end_of_message - elif reason == "length": - stop_reason = StopReason.out_of_tokens - - if stop_reason is None: - stop_reason = StopReason.out_of_tokens - completion_message = formatter.decode_assistant_message_from_content( - text_from_choice(choice), stop_reason + text_from_choice(choice), get_stop_reason(choice.finish_reason) ) return ChatCompletionResponse( completion_message=completion_message, @@ -75,6 +87,43 @@ def process_chat_completion_response( ) +async def process_completion_stream_response( + stream: AsyncGenerator[OpenAICompatCompletionResponse, None], formatter: ChatFormat +) -> AsyncGenerator: + + stop_reason = None + + async for chunk in stream: + choice = chunk.choices[0] + finish_reason = choice.finish_reason + + if finish_reason: + if finish_reason in ["stop", "eos", "eos_token"]: + stop_reason = StopReason.end_of_turn + elif finish_reason == "length": + stop_reason = StopReason.out_of_tokens + break + + text = text_from_choice(choice) + if text == "<|eot_id|>": + stop_reason = StopReason.end_of_turn + text = "" + continue + elif text == "<|eom_id|>": + stop_reason = StopReason.end_of_message + text = "" + continue + yield CompletionResponseStreamChunk( + delta=text, + stop_reason=stop_reason, + ) + + yield CompletionResponseStreamChunk( + delta="", + stop_reason=stop_reason, + ) + + async def process_chat_completion_stream_response( stream: AsyncGenerator[OpenAICompatCompletionResponse, None], formatter: ChatFormat ) -> AsyncGenerator: diff --git a/llama_stack/providers/utils/inference/prompt_adapter.py b/llama_stack/providers/utils/inference/prompt_adapter.py index 5b8ded52c..9d695698f 100644 --- a/llama_stack/providers/utils/inference/prompt_adapter.py +++ b/llama_stack/providers/utils/inference/prompt_adapter.py @@ -23,6 +23,13 @@ from llama_models.sku_list import resolve_model from llama_stack.providers.utils.inference import supported_inference_models +def completion_request_to_prompt( + request: CompletionRequest, formatter: ChatFormat +) -> str: + model_input = formatter.encode_content(request.content) + return formatter.tokenizer.decode(model_input.tokens) + + def chat_completion_request_to_prompt( request: ChatCompletionRequest, formatter: ChatFormat ) -> str: From b279d3bc58d260357aae582f36ddd17f25161953 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Tue, 22 Oct 2024 08:01:33 -0700 Subject: [PATCH 40/40] Update README.md --- distributions/README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/distributions/README.md b/distributions/README.md index 0cb812eb4..dc1e3cc25 100644 --- a/distributions/README.md +++ b/distributions/README.md @@ -6,8 +6,8 @@ A Distribution is where APIs and Providers are assembled together to provide a c ## Quick Start Llama Stack Distributions Guide | **Distribution** | **Llama Stack Docker** | Start This Distribution | **Inference** | **Agents** | **Memory** | **Safety** | **Telemetry** | |:----------------: |:------------------------------------------: |:-----------------------: |:------------------: |:------------------: |:------------------: |:------------------: |:------------------: | -| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](./meta-reference-gpu/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](./ollama/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](./tgi/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](./together/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | -| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](./fireworks/) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Meta Reference | [llamastack/distribution-meta-reference-gpu](https://hub.docker.com/repository/docker/llamastack/distribution-meta-reference-gpu/general) | [Guide](./meta-reference-gpu/) | meta-reference | meta-reference | meta-reference; remote::pgvector; remote::chromadb | meta-reference | meta-reference | +| Ollama | [llamastack/distribution-ollama](https://hub.docker.com/repository/docker/llamastack/distribution-ollama/general) | [Guide](./ollama/) | remote::ollama | meta-reference | remote::pgvector; remote::chromadb | remote::ollama | meta-reference | +| TGI | [llamastack/distribution-tgi](https://hub.docker.com/repository/docker/llamastack/distribution-tgi/general) | [Guide](./tgi/) | remote::tgi | meta-reference | meta-reference; remote::pgvector; remote::chromadb | meta-reference | meta-reference | +| Together | [llamastack/distribution-together](https://hub.docker.com/repository/docker/llamastack/distribution-together/general) | [Guide](./together/) | remote::together | meta-reference | remote::weaviate | meta-reference | meta-reference | +| Fireworks | [llamastack/distribution-fireworks](https://hub.docker.com/repository/docker/llamastack/distribution-fireworks/general) | [Guide](./fireworks/) | remote::fireworks | meta-reference | remote::weaviate | meta-reference | meta-reference |