From 42637644937b55ea752991a2c14a0a6a28b72722 Mon Sep 17 00:00:00 2001 From: Ashwin Bharambe Date: Mon, 7 Oct 2024 06:46:32 -0700 Subject: [PATCH 01/10] Fix adapter_id -> adapter_type for Weaviate --- llama_stack/providers/registry/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index a5f302d4f..a3f0bdb6f 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -59,7 +59,7 @@ def available_providers() -> List[ProviderSpec]: remote_provider_spec( Api.memory, AdapterSpec( - adapter_id="weaviate", + adapter_type="weaviate", pip_packages=EMBEDDING_DEPS + ["weaviate-client"], module="llama_stack.providers.adapters.memory.weaviate", provider_data_validator="llama_stack.providers.adapters.memory.weaviate.WeaviateRequestProviderData", From a4e775c465af4c3893302a3617cca72cef0a3e49 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 7 Oct 2024 11:40:04 -0400 Subject: [PATCH 02/10] download: improve help text (#204) --- llama_stack/cli/download.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index 4d0966bb2..a1495cbf0 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -169,7 +169,7 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): meta_url = args.meta_url if not meta_url: meta_url = input( - "Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): " + "Please provide the signed URL you received via email after visiting https://www.llama.com/llama-downloads/ (e.g., https://llama3-1.llamameta.net/*?Policy...): " ) assert meta_url is not None and "llamameta.net" in meta_url _meta_download(model, meta_url, info) From 53d440e952059bdd62736ab6b65df33a1a0773f6 Mon Sep 17 00:00:00 2001 From: Mindaugas Date: Mon, 7 Oct 2024 18:55:06 +0300 Subject: [PATCH 03/10] Fix ValueError in case chunks are empty (#206) --- .../impls/meta_reference/agents/agent_instance.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 9db6b79b5..661da10cc 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -673,7 +673,7 @@ class ChatAgent(ShieldRunnerMixin): async def _retrieve_context( self, session_id: str, messages: List[Message], attachments: List[Attachment] - ) -> Tuple[List[str], List[int]]: # (rag_context, bank_ids) + ) -> Tuple[Optional[List[str]], Optional[List[int]]]: # (rag_context, bank_ids) bank_ids = [] memory = self._memory_tool_definition() @@ -722,12 +722,13 @@ class ChatAgent(ShieldRunnerMixin): chunks = [c for r in results for c in r.chunks] scores = [s for r in results for s in r.scores] + if not chunks: + return None, bank_ids + # sort by score chunks, scores = zip( *sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True) ) - if not chunks: - return None, bank_ids tokens = 0 picked = [] From 2366e188739bc5c65f72644541a80c0a99eb4f49 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 10:21:26 -0700 Subject: [PATCH 04/10] refactor docs (#209) --- CONTRIBUTING.md | 4 ++-- README.md | 17 +++++++++++++++-- docs/cli_reference.md | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 122080e9c..5948e7110 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to Llama-Models +# Contributing to Llama-Stack We want to make contributing to this project as easy and transparent as possible. @@ -32,7 +32,7 @@ outlined on that page and do not file a public issue. * ... ## Tips -* If you are developing with a llama-models repository checked out and need your distribution to reflect changes from there, set `LLAMA_MODELS_DIR` to that dir when running any of the `llama` CLI commands. +* If you are developing with a llama-stack repository checked out and need your distribution to reflect changes from there, set `LLAMA_STACK_DIR` to that dir when running any of the `llama` CLI commands. ## License By contributing to Llama, you agree that your contributions will be licensed diff --git a/README.md b/README.md index a5172ce5c..050a71aff 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,24 @@ cd llama-stack $CONDA_PREFIX/bin/pip install -e . ``` -## The Llama CLI +## Documentations -The `llama` CLI makes it easy to work with the Llama Stack set of tools, including installing and running Distributions, downloading models, studying model prompt formats, etc. Please see the [CLI reference](docs/cli_reference.md) for details. Please see the [Getting Started](docs/getting_started.md) guide for running a Llama Stack server. +The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please find the following docs for details. + +* [CLI reference](docs/cli_reference.md) + * Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution. +* [Getting Started](docs/getting_started.md) + * Guide to build and run a Llama Stack server. +* [Contributing](CONTRIBUTING.md) ## Llama Stack Client SDK +| **Language** | **Client SDK** | **Package** | +| :----: | :----: | :----: | +| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) +| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | +| Node | [llama-stack-client-node](https://github.com/meta-llama/llama-stack-client-node) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client) +| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | + Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 8e5feeb6b..0b5e73fb9 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -1,6 +1,6 @@ # Llama CLI Reference -The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. +The `llama` CLI tool helps you setup and use the Llama Stack & agentic systems. It should be available on your path after installing the `llama-stack` package. ### Subcommands 1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. From 996efa9b425e7cc6a083b0e66f4e0131dd4c7c2c Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 7 Oct 2024 13:26:52 -0400 Subject: [PATCH 05/10] README.md: Add vLLM to providers table (#207) Signed-off-by: Russell Bryant --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 050a71aff..a8a5f5e69 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,7 @@ A Distribution is where APIs and Providers are assembled together to provide a c | Chroma | Single Node | | | :heavy_check_mark: | | | | PG Vector | Single Node | | | :heavy_check_mark: | | | | PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | +| [vLLM](https://docs.vllm.ai/en/latest/) | Single Node | | :heavy_check_mark: | | | ### Distributions | **Distribution Provider** | **Docker** | **Inference** | **Memory** | **Safety** | **Telemetry** | From 16ba0fa06fd166d1c5a8d43fed9c5734a7c34f29 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 11:24:27 -0700 Subject: [PATCH 06/10] Update README.md --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index a8a5f5e69..050a71aff 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,6 @@ A Distribution is where APIs and Providers are assembled together to provide a c | Chroma | Single Node | | | :heavy_check_mark: | | | | PG Vector | Single Node | | | :heavy_check_mark: | | | | PyTorch ExecuTorch | On-device iOS | :heavy_check_mark: | :heavy_check_mark: | | | -| [vLLM](https://docs.vllm.ai/en/latest/) | Single Node | | :heavy_check_mark: | | | ### Distributions | **Distribution Provider** | **Docker** | **Inference** | **Memory** | **Safety** | **Telemetry** | From e4ae09d090eeb793f65efdfdc0e647bf076018f3 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Mon, 7 Oct 2024 22:38:43 -0400 Subject: [PATCH 07/10] Add .idea to .gitignore (#216) Signed-off-by: Yuan Tang --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 2465d2d4e..d0a5f0056 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ xcuserdata/ Package.resolved *.pte *.ipynb_checkpoints* +.idea From 4d5f7459aab775464efb4c9adfddd90c5e600ae4 Mon Sep 17 00:00:00 2001 From: Xi Yan Date: Mon, 7 Oct 2024 19:42:39 -0700 Subject: [PATCH 08/10] [bugfix] Fix logprobs on meta-reference impl (#213) * fix log probs * add back LogProbsConfig * error handling * bugfix --- llama_stack/apis/inference/client.py | 26 +++++++++++++++---- .../meta_reference/inference/generation.py | 2 +- .../meta_reference/inference/inference.py | 15 ++++++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 5cfae633c..fffcf4692 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -6,7 +6,6 @@ import asyncio import json -import sys from typing import Any, AsyncGenerator, List, Optional import fire @@ -101,7 +100,9 @@ class InferenceClient(Inference): print(f"Error with parsing or validation: {e}") -async def run_main(host: str, port: int, stream: bool, model: Optional[str]): +async def run_main( + host: str, port: int, stream: bool, model: Optional[str], logprobs: bool +): client = InferenceClient(f"http://{host}:{port}") if not model: @@ -111,13 +112,27 @@ async def run_main(host: str, port: int, stream: bool, model: Optional[str]): content="hello world, write me a 2 sentence poem about the moon" ) cprint(f"User>{message.content}", "green") + + if logprobs: + logprobs_config = LogProbConfig( + top_k=1, + ) + else: + logprobs_config = None + iterator = client.chat_completion( model=model, messages=[message], stream=stream, + logprobs=logprobs_config, ) - async for log in EventLogger().log(iterator): - log.print() + + if logprobs: + async for chunk in iterator: + cprint(f"Response: {chunk}", "red") + else: + async for log in EventLogger().log(iterator): + log.print() async def run_mm_main( @@ -149,13 +164,14 @@ def main( port: int, stream: bool = True, mm: bool = False, + logprobs: bool = False, file: Optional[str] = None, model: Optional[str] = None, ): if mm: asyncio.run(run_mm_main(host, port, stream, file, model)) else: - asyncio.run(run_main(host, port, stream, model)) + asyncio.run(run_main(host, port, stream, model, logprobs)) if __name__ == "__main__": diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 4351a3d56..27e086e0f 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -297,7 +297,7 @@ class Llama: token=next_token[0].item(), text=self.tokenizer.decode(next_token.tolist()), logprobs=( - token_logprobs[:, prev_pos + 1 : cur_pos + 1][0].tolist() + token_logprobs[:, cur_pos : cur_pos + 1][0].tolist() if logprobs else None ), diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index e89d8ec4c..dca4ea6fb 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -132,7 +132,20 @@ class MetaReferenceInferenceImpl(Inference, RoutableProvider): if not request.stream: if request.logprobs: - logprobs.append(token_result.logprob) + assert ( + len(token_result.logprobs) == 1 + ), "Expected logprob to contain 1 result for the current token" + assert ( + request.logprobs.top_k == 1 + ), "Only top_k=1 is supported for LogProbConfig" + + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ) continue From 48d0d2001eaa7b2cb82bef603a2921736c04b657 Mon Sep 17 00:00:00 2001 From: Yuan Tang Date: Tue, 8 Oct 2024 09:55:16 -0400 Subject: [PATCH 09/10] Add classifiers in setup.py (#217) * Add classifiers in setup.py * Update setup.py * Update setup.py --- setup.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae1f58015..4db636872 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,16 @@ setup( long_description_content_type="text/markdown", url="https://github.com/meta-llama/llama-stack", packages=find_packages(), - classifiers=[], + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", + ], python_requires=">=3.10", install_requires=read_requirements(), include_package_data=True, From 2d4f7d8acfc961b18add259c1f6ef2aef7831a04 Mon Sep 17 00:00:00 2001 From: Dalton Flanagan <6599399+dltn@users.noreply.github.com> Date: Tue, 8 Oct 2024 13:30:40 -0400 Subject: [PATCH 10/10] Create SECURITY.md --- SECURITY.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 SECURITY.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..fc58b67d3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please report vulnerabilities to our bug bounty program at https://bugbounty.meta.com/