diff --git a/.gitignore b/.gitignore index 7b8321844..cc4da1a60 100644 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ Package.resolved *.pte *.ipynb_checkpoints* .venv/ +.idea diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 122080e9c..5948e7110 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,4 +1,4 @@ -# Contributing to Llama-Models +# Contributing to Llama-Stack We want to make contributing to this project as easy and transparent as possible. @@ -32,7 +32,7 @@ outlined on that page and do not file a public issue. * ... ## Tips -* If you are developing with a llama-models repository checked out and need your distribution to reflect changes from there, set `LLAMA_MODELS_DIR` to that dir when running any of the `llama` CLI commands. +* If you are developing with a llama-stack repository checked out and need your distribution to reflect changes from there, set `LLAMA_STACK_DIR` to that dir when running any of the `llama` CLI commands. ## License By contributing to Llama, you agree that your contributions will be licensed diff --git a/README.md b/README.md index a5172ce5c..050a71aff 100644 --- a/README.md +++ b/README.md @@ -81,11 +81,24 @@ cd llama-stack $CONDA_PREFIX/bin/pip install -e . ``` -## The Llama CLI +## Documentations -The `llama` CLI makes it easy to work with the Llama Stack set of tools, including installing and running Distributions, downloading models, studying model prompt formats, etc. Please see the [CLI reference](docs/cli_reference.md) for details. Please see the [Getting Started](docs/getting_started.md) guide for running a Llama Stack server. +The `llama` CLI makes it easy to work with the Llama Stack set of tools. Please find the following docs for details. + +* [CLI reference](docs/cli_reference.md) + * Guide using `llama` CLI to work with Llama models (download, study prompts), and building/starting a Llama Stack distribution. +* [Getting Started](docs/getting_started.md) + * Guide to build and run a Llama Stack server. +* [Contributing](CONTRIBUTING.md) ## Llama Stack Client SDK +| **Language** | **Client SDK** | **Package** | +| :----: | :----: | :----: | +| Python | [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) | [![PyPI version](https://img.shields.io/pypi/v/llama_stack_client.svg)](https://pypi.org/project/llama_stack_client/) +| Swift | [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift) | +| Node | [llama-stack-client-node](https://github.com/meta-llama/llama-stack-client-node) | [![NPM version](https://img.shields.io/npm/v/llama-stack-client.svg)](https://npmjs.org/package/llama-stack-client) +| Kotlin | [llama-stack-client-kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) | + Check out our client SDKs for connecting to Llama Stack server in your preferred language, you can choose from [python](https://github.com/meta-llama/llama-stack-client-python), [node](https://github.com/meta-llama/llama-stack-client-node), [swift](https://github.com/meta-llama/llama-stack-client-swift), and [kotlin](https://github.com/meta-llama/llama-stack-client-kotlin) programming languages to quickly build your applications. diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 000000000..fc58b67d3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,5 @@ +# Security Policy + +## Reporting a Vulnerability + +Please report vulnerabilities to our bug bounty program at https://bugbounty.meta.com/ diff --git a/docs/cli_reference.md b/docs/cli_reference.md index 8e5feeb6b..0b5e73fb9 100644 --- a/docs/cli_reference.md +++ b/docs/cli_reference.md @@ -1,6 +1,6 @@ # Llama CLI Reference -The `llama` CLI tool helps you setup and use the Llama toolchain & agentic systems. It should be available on your path after installing the `llama-stack` package. +The `llama` CLI tool helps you setup and use the Llama Stack & agentic systems. It should be available on your path after installing the `llama-stack` package. ### Subcommands 1. `download`: `llama` cli tools supports downloading the model from Meta or Hugging Face. diff --git a/llama_stack/apis/inference/client.py b/llama_stack/apis/inference/client.py index 5cfae633c..fffcf4692 100644 --- a/llama_stack/apis/inference/client.py +++ b/llama_stack/apis/inference/client.py @@ -6,7 +6,6 @@ import asyncio import json -import sys from typing import Any, AsyncGenerator, List, Optional import fire @@ -101,7 +100,9 @@ class InferenceClient(Inference): print(f"Error with parsing or validation: {e}") -async def run_main(host: str, port: int, stream: bool, model: Optional[str]): +async def run_main( + host: str, port: int, stream: bool, model: Optional[str], logprobs: bool +): client = InferenceClient(f"http://{host}:{port}") if not model: @@ -111,13 +112,27 @@ async def run_main(host: str, port: int, stream: bool, model: Optional[str]): content="hello world, write me a 2 sentence poem about the moon" ) cprint(f"User>{message.content}", "green") + + if logprobs: + logprobs_config = LogProbConfig( + top_k=1, + ) + else: + logprobs_config = None + iterator = client.chat_completion( model=model, messages=[message], stream=stream, + logprobs=logprobs_config, ) - async for log in EventLogger().log(iterator): - log.print() + + if logprobs: + async for chunk in iterator: + cprint(f"Response: {chunk}", "red") + else: + async for log in EventLogger().log(iterator): + log.print() async def run_mm_main( @@ -149,13 +164,14 @@ def main( port: int, stream: bool = True, mm: bool = False, + logprobs: bool = False, file: Optional[str] = None, model: Optional[str] = None, ): if mm: asyncio.run(run_mm_main(host, port, stream, file, model)) else: - asyncio.run(run_main(host, port, stream, model)) + asyncio.run(run_main(host, port, stream, model, logprobs)) if __name__ == "__main__": diff --git a/llama_stack/cli/download.py b/llama_stack/cli/download.py index 4d0966bb2..a1495cbf0 100644 --- a/llama_stack/cli/download.py +++ b/llama_stack/cli/download.py @@ -169,7 +169,7 @@ def run_download_cmd(args: argparse.Namespace, parser: argparse.ArgumentParser): meta_url = args.meta_url if not meta_url: meta_url = input( - "Please provide the signed URL you received via email (e.g., https://llama3-1.llamameta.net/*?Policy...): " + "Please provide the signed URL you received via email after visiting https://www.llama.com/llama-downloads/ (e.g., https://llama3-1.llamameta.net/*?Policy...): " ) assert meta_url is not None and "llamameta.net" in meta_url _meta_download(model, meta_url, info) diff --git a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py index 9db6b79b5..661da10cc 100644 --- a/llama_stack/providers/impls/meta_reference/agents/agent_instance.py +++ b/llama_stack/providers/impls/meta_reference/agents/agent_instance.py @@ -673,7 +673,7 @@ class ChatAgent(ShieldRunnerMixin): async def _retrieve_context( self, session_id: str, messages: List[Message], attachments: List[Attachment] - ) -> Tuple[List[str], List[int]]: # (rag_context, bank_ids) + ) -> Tuple[Optional[List[str]], Optional[List[int]]]: # (rag_context, bank_ids) bank_ids = [] memory = self._memory_tool_definition() @@ -722,12 +722,13 @@ class ChatAgent(ShieldRunnerMixin): chunks = [c for r in results for c in r.chunks] scores = [s for r in results for s in r.scores] + if not chunks: + return None, bank_ids + # sort by score chunks, scores = zip( *sorted(zip(chunks, scores), key=lambda x: x[1], reverse=True) ) - if not chunks: - return None, bank_ids tokens = 0 picked = [] diff --git a/llama_stack/providers/impls/meta_reference/inference/generation.py b/llama_stack/providers/impls/meta_reference/inference/generation.py index 4351a3d56..27e086e0f 100644 --- a/llama_stack/providers/impls/meta_reference/inference/generation.py +++ b/llama_stack/providers/impls/meta_reference/inference/generation.py @@ -297,7 +297,7 @@ class Llama: token=next_token[0].item(), text=self.tokenizer.decode(next_token.tolist()), logprobs=( - token_logprobs[:, prev_pos + 1 : cur_pos + 1][0].tolist() + token_logprobs[:, cur_pos : cur_pos + 1][0].tolist() if logprobs else None ), diff --git a/llama_stack/providers/impls/meta_reference/inference/inference.py b/llama_stack/providers/impls/meta_reference/inference/inference.py index e89d8ec4c..dca4ea6fb 100644 --- a/llama_stack/providers/impls/meta_reference/inference/inference.py +++ b/llama_stack/providers/impls/meta_reference/inference/inference.py @@ -132,7 +132,20 @@ class MetaReferenceInferenceImpl(Inference, RoutableProvider): if not request.stream: if request.logprobs: - logprobs.append(token_result.logprob) + assert ( + len(token_result.logprobs) == 1 + ), "Expected logprob to contain 1 result for the current token" + assert ( + request.logprobs.top_k == 1 + ), "Only top_k=1 is supported for LogProbConfig" + + logprobs.append( + TokenLogProbs( + logprobs_by_token={ + token_result.text: token_result.logprobs[0] + } + ) + ) continue diff --git a/llama_stack/providers/registry/memory.py b/llama_stack/providers/registry/memory.py index a5f302d4f..a3f0bdb6f 100644 --- a/llama_stack/providers/registry/memory.py +++ b/llama_stack/providers/registry/memory.py @@ -59,7 +59,7 @@ def available_providers() -> List[ProviderSpec]: remote_provider_spec( Api.memory, AdapterSpec( - adapter_id="weaviate", + adapter_type="weaviate", pip_packages=EMBEDDING_DEPS + ["weaviate-client"], module="llama_stack.providers.adapters.memory.weaviate", provider_data_validator="llama_stack.providers.adapters.memory.weaviate.WeaviateRequestProviderData", diff --git a/setup.py b/setup.py index ae1f58015..4db636872 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,16 @@ setup( long_description_content_type="text/markdown", url="https://github.com/meta-llama/llama-stack", packages=find_packages(), - classifiers=[], + classifiers=[ + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Operating System :: OS Independent", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Scientific/Engineering :: Information Analysis", + ], python_requires=">=3.10", install_requires=read_requirements(), include_package_data=True,