mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
[wip] client w/ stainless sdk
This commit is contained in:
parent
741310f78e
commit
ee32de4c3f
2 changed files with 81 additions and 0 deletions
49
llama_toolchain/agentic_system/client_sdk.py
Normal file
49
llama_toolchain/agentic_system/client_sdk.py
Normal file
|
@ -0,0 +1,49 @@
|
|||
import fire
|
||||
from llama_stack import LlamaStack
|
||||
from llama_stack.types import UserMessage
|
||||
|
||||
def main(host: str, port: int):
|
||||
client = LlamaStack(
|
||||
base_url=f"http://{host}:{port}",
|
||||
)
|
||||
|
||||
# Need smt like this to work w/ server, however this is not what was generated by SDK
|
||||
# response = client.inference.chat_completion(
|
||||
# request={
|
||||
# "messages": [
|
||||
# UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
||||
# ],
|
||||
# "model": "Meta-Llama3.1-8B-Instruct",
|
||||
# "stream": True,
|
||||
# },
|
||||
# )
|
||||
|
||||
agentic_system_create_response = client.agentic_system.create(
|
||||
agent_config={
|
||||
"instructions": "You are a helpful assistant",
|
||||
"model": "Meta-Llama3.1-8B-Instruct",
|
||||
},
|
||||
)
|
||||
print(agentic_system_create_response)
|
||||
|
||||
agentic_system_create_session_response = client.agentic_system.sessions.create(
|
||||
agent_id=agentic_system_create_response.agent_id,
|
||||
session_name="test_session",
|
||||
)
|
||||
print(agentic_system_create_session_response)
|
||||
|
||||
# TODO(xiyan): This does not work with current server, need to wrap it in a request (similar to AgentConfig?)
|
||||
response = client.agentic_system.turns.create(
|
||||
agent_id=agentic_system_create_response.agent_id,
|
||||
session_id=agentic_system_create_session_response.session_id,
|
||||
messages=[
|
||||
UserMessage(content="What is the capital of France?", role="user"),
|
||||
],
|
||||
stream=True,
|
||||
)
|
||||
|
||||
# print(response)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
32
llama_toolchain/inference/client_sdk.py
Normal file
32
llama_toolchain/inference/client_sdk.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
import fire
|
||||
from llama_stack import LlamaStack
|
||||
from llama_stack.types import UserMessage
|
||||
|
||||
def main(host: str, port: int):
|
||||
client = LlamaStack(
|
||||
base_url=f"http://{host}:{port}",
|
||||
)
|
||||
|
||||
# Need smt like this to work w/ server, however this is not what was generated by SDK (?)
|
||||
response = client.inference.chat_completion(
|
||||
request={
|
||||
"messages": [
|
||||
UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
||||
],
|
||||
"model": "Meta-Llama3.1-8B-Instruct",
|
||||
"stream": False,
|
||||
},
|
||||
)
|
||||
|
||||
print(response)
|
||||
# This does not work with current server
|
||||
# response = client.inference.chat_completion(
|
||||
# messages=[
|
||||
# UserMessage(content="hello world, troll me in two-paragraphs about 42", role="user"),
|
||||
# ],
|
||||
# model="Meta-Llama3.1-8B-Instruct",
|
||||
# stream=True,
|
||||
# )
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
Loading…
Add table
Add a link
Reference in a new issue