diff --git a/docs/source/getting_started/index.md b/docs/source/getting_started/index.md index a71660fff..6a18972b1 100644 --- a/docs/source/getting_started/index.md +++ b/docs/source/getting_started/index.md @@ -62,7 +62,7 @@ llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list You can test basic Llama inference completion using the CLI too. ```bash llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT \ - inference chat-completion \ + inference chat_completion \ --message "hello, what model are you?" ``` @@ -118,6 +118,7 @@ async def run_main(): model=os.environ["INFERENCE_MODEL"], instructions="You are a helpful assistant", tools=[{"type": "memory"}], # enable Memory aka RAG + enable_session_persistence=True, ) agent = Agent(client, agent_config)