mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 18:00:36 +00:00
API Updates: fleshing out RAG APIs, introduce "llama stack" CLI command (#51)
* add tools to chat completion request
* use templates for generating system prompts
* Moved ToolPromptFormat and jinja templates to llama_models.llama3.api
* <WIP> memory changes
- inlined AgenticSystemInstanceConfig so API feels more ergonomic
- renamed it to AgentConfig, AgentInstance -> Agent
- added a MemoryConfig and `memory` parameter
- added `attachments` to input and `output_attachments` to the response
- some naming changes
* InterleavedTextAttachment -> InterleavedTextMedia, introduce memory tool
* flesh out memory banks API
* agentic loop has a RAG implementation
* faiss provider implementation
* memory client works
* re-work tool definitions, fix FastAPI issues, fix tool regressions
* fix agentic_system utils
* basic RAG seems to work
* small bug fixes for inline attachments
* Refactor custom tool execution utilities
* Bug fix, show memory retrieval steps in EventLogger
* No need for api_key for Remote providers
* add special unicode character ↵ to showcase newlines in model prompt templates
* remove api.endpoints imports
* combine datatypes.py and endpoints.py into api.py
* Attachment / add TTL api
* split batch_inference from inference
* minor import fixes
* use a single impl for ChatFormat.decode_assistant_mesage
* use interleaved_text_media_as_str() utilityt
* Fix api.datatypes imports
* Add blobfile for tiktoken
* Add ToolPromptFormat to ChatFormat.encode_message so that tools are encoded properly
* templates take optional --format={json,function_tag}
* Rag Updates
* Add `api build` subcommand -- WIP
* fix
* build + run image seems to work
* <WIP> adapters
* bunch more work to make adapters work
* api build works for conda now
* ollama remote adapter works
* Several smaller fixes to make adapters work
Also, reorganized the pattern of __init__ inside providers so
configuration can stay lightweight
* llama distribution -> llama stack + containers (WIP)
* All the new CLI for api + stack work
* Make Fireworks and Together into the Adapter format
* Some quick fixes to the CLI behavior to make it consistent
* Updated README phew
* Update cli_reference.md
* llama_toolchain/distribution -> llama_toolchain/core
* Add termcolor
* update paths
* Add a log just for consistency
* chmod +x scripts
* Fix api dependencies not getting added to configuration
* missing import lol
* Delete utils.py; move to agentic system
* Support downloading of URLs for attachments for code interpreter
* Simplify and generalize `llama api build` yay
* Update `llama stack configure` to be very simple also
* Fix stack start
* Allow building an "adhoc" distribution
* Remote `llama api []` subcommands
* Fixes to llama stack commands and update docs
* Update documentation again and add error messages to llama stack start
* llama stack start -> llama stack run
* Change name of build for less confusion
* Add pyopenapi fork to the repository, update RFC assets
* Remove conflicting annotation
* Added a "--raw" option for model template printing
---------
Co-authored-by: Hardik Shah <hjshah@fb.com>
Co-authored-by: Ashwin Bharambe <ashwin@meta.com>
Co-authored-by: Dalton Flanagan <6599399+dltn@users.noreply.github.com>
This commit is contained in:
parent
35093c0b6f
commit
7bc7785b0d
141 changed files with 8252 additions and 4032 deletions
|
|
@ -8,16 +8,21 @@ import unittest
|
|||
|
||||
from datetime import datetime
|
||||
|
||||
from llama_models.llama3_1.api.datatypes import (
|
||||
from llama_models.llama3.api.datatypes import (
|
||||
BuiltinTool,
|
||||
StopReason,
|
||||
SystemMessage,
|
||||
ToolDefinition,
|
||||
ToolParamDefinition,
|
||||
ToolPromptFormat,
|
||||
ToolResponseMessage,
|
||||
UserMessage,
|
||||
)
|
||||
from llama_toolchain.inference.api.datatypes import ChatCompletionResponseEventType
|
||||
|
||||
from llama_toolchain.inference.api.endpoints import ChatCompletionRequest
|
||||
from llama_toolchain.inference.api import (
|
||||
ChatCompletionRequest,
|
||||
ChatCompletionResponseEventType,
|
||||
)
|
||||
from llama_toolchain.inference.meta_reference.config import MetaReferenceImplConfig
|
||||
from llama_toolchain.inference.meta_reference.inference import get_provider_impl
|
||||
|
||||
|
|
@ -54,52 +59,6 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
cls.api = await get_provider_impl(config, {})
|
||||
await cls.api.initialize()
|
||||
|
||||
current_date = datetime.now()
|
||||
formatted_date = current_date.strftime("%d %B %Y")
|
||||
cls.system_prompt = SystemMessage(
|
||||
content=textwrap.dedent(
|
||||
f"""
|
||||
Environment: ipython
|
||||
Tools: brave_search
|
||||
|
||||
Cutting Knowledge Date: December 2023
|
||||
Today Date:{formatted_date}
|
||||
|
||||
"""
|
||||
),
|
||||
)
|
||||
cls.system_prompt_with_custom_tool = SystemMessage(
|
||||
content=textwrap.dedent(
|
||||
"""
|
||||
Environment: ipython
|
||||
Tools: brave_search, wolfram_alpha, photogen
|
||||
|
||||
Cutting Knowledge Date: December 2023
|
||||
Today Date: 30 July 2024
|
||||
|
||||
|
||||
You have access to the following functions:
|
||||
|
||||
Use the function 'get_boiling_point' to 'Get the boiling point of a imaginary liquids (eg. polyjuice)'
|
||||
{"name": "get_boiling_point", "description": "Get the boiling point of a imaginary liquids (eg. polyjuice)", "parameters": {"liquid_name": {"param_type": "string", "description": "The name of the liquid", "required": true}, "celcius": {"param_type": "boolean", "description": "Whether to return the boiling point in Celcius", "required": false}}}
|
||||
|
||||
|
||||
Think very carefully before calling functions.
|
||||
If you choose to call a function ONLY reply in the following format with no prefix or suffix:
|
||||
|
||||
<function=example_function_name>{"example_name": "example_value"}</function>
|
||||
|
||||
Reminder:
|
||||
- If looking for real time information use relevant functions before falling back to brave_search
|
||||
- Function calls MUST follow the specified format, start with <function= and end with </function>
|
||||
- Required parameters MUST be specified
|
||||
- Only call one function at a time
|
||||
- Put the entire function call reply on one line
|
||||
|
||||
"""
|
||||
),
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
# This runs the async teardown function
|
||||
|
|
@ -111,6 +70,22 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
|
||||
async def asyncSetUp(self):
|
||||
self.valid_supported_model = MODEL
|
||||
self.custom_tool_defn = ToolDefinition(
|
||||
tool_name="get_boiling_point",
|
||||
description="Get the boiling point of a imaginary liquids (eg. polyjuice)",
|
||||
parameters={
|
||||
"liquid_name": ToolParamDefinition(
|
||||
param_type="str",
|
||||
description="The name of the liquid",
|
||||
required=True,
|
||||
),
|
||||
"celcius": ToolParamDefinition(
|
||||
param_type="boolean",
|
||||
description="Whether to return the boiling point in Celcius",
|
||||
required=False,
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
async def test_text(self):
|
||||
request = ChatCompletionRequest(
|
||||
|
|
@ -162,12 +137,12 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
request = ChatCompletionRequest(
|
||||
model=self.valid_supported_model,
|
||||
messages=[
|
||||
InferenceTests.system_prompt_with_custom_tool,
|
||||
UserMessage(
|
||||
content="Use provided function to find the boiling point of polyjuice in fahrenheit?",
|
||||
),
|
||||
],
|
||||
stream=False,
|
||||
tools=[self.custom_tool_defn],
|
||||
)
|
||||
iterator = InferenceTests.api.chat_completion(request)
|
||||
async for r in iterator:
|
||||
|
|
@ -197,11 +172,11 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
request = ChatCompletionRequest(
|
||||
model=self.valid_supported_model,
|
||||
messages=[
|
||||
self.system_prompt,
|
||||
UserMessage(
|
||||
content="Who is the current US President?",
|
||||
),
|
||||
],
|
||||
tools=[ToolDefinition(tool_name=BuiltinTool.brave_search)],
|
||||
stream=True,
|
||||
)
|
||||
iterator = InferenceTests.api.chat_completion(request)
|
||||
|
|
@ -227,17 +202,20 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
request = ChatCompletionRequest(
|
||||
model=self.valid_supported_model,
|
||||
messages=[
|
||||
InferenceTests.system_prompt_with_custom_tool,
|
||||
UserMessage(
|
||||
content="Use provided function to find the boiling point of polyjuice?",
|
||||
),
|
||||
],
|
||||
stream=True,
|
||||
tools=[self.custom_tool_defn],
|
||||
tool_prompt_format=ToolPromptFormat.function_tag,
|
||||
)
|
||||
iterator = InferenceTests.api.chat_completion(request)
|
||||
events = []
|
||||
async for chunk in iterator:
|
||||
# print(f"{chunk.event.event_type:<40} | {str(chunk.event.stop_reason):<26} | {chunk.event.delta} ")
|
||||
# print(
|
||||
# f"{chunk.event.event_type:<40} | {str(chunk.event.stop_reason):<26} | {chunk.event.delta} "
|
||||
# )
|
||||
events.append(chunk.event)
|
||||
|
||||
self.assertEqual(events[0].event_type, ChatCompletionResponseEventType.start)
|
||||
|
|
@ -257,7 +235,6 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
request = ChatCompletionRequest(
|
||||
model=self.valid_supported_model,
|
||||
messages=[
|
||||
self.system_prompt,
|
||||
UserMessage(
|
||||
content="Search the web and tell me who the "
|
||||
"44th president of the United States was",
|
||||
|
|
@ -270,6 +247,7 @@ class InferenceTests(unittest.IsolatedAsyncioTestCase):
|
|||
),
|
||||
],
|
||||
stream=True,
|
||||
tools=[ToolDefinition(tool_name=BuiltinTool.brave_search)],
|
||||
)
|
||||
iterator = self.api.chat_completion(request)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue