mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
Fix precommit check after moving to ruff (#927)
Lint check in main branch is failing. This fixes the lint check after we moved to ruff in https://github.com/meta-llama/llama-stack/pull/921. We need to move to a `ruff.toml` file as well as fixing and ignoring some additional checks. Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
parent
4773092dd1
commit
34ab7a3b6c
217 changed files with 981 additions and 2681 deletions
|
@ -77,7 +77,7 @@ agent_config = AgentConfig(
|
|||
instructions="You are a helpful assistant",
|
||||
# Enable both RAG and tool usage
|
||||
toolgroups=[
|
||||
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}}.
|
||||
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}},
|
||||
"builtin::code_interpreter",
|
||||
],
|
||||
# Configure safety
|
||||
|
@ -86,13 +86,9 @@ agent_config = AgentConfig(
|
|||
# Control the inference loop
|
||||
max_infer_iters=5,
|
||||
sampling_params={
|
||||
"strategy": {
|
||||
"type": "top_p",
|
||||
"temperature": 0.7,
|
||||
"top_p": 0.95
|
||||
},
|
||||
"max_tokens": 2048
|
||||
}
|
||||
"strategy": {"type": "top_p", "temperature": 0.7, "top_p": 0.95},
|
||||
"max_tokens": 2048,
|
||||
},
|
||||
)
|
||||
|
||||
agent = Agent(client, agent_config)
|
||||
|
@ -101,11 +97,13 @@ session_id = agent.create_session("monitored_session")
|
|||
# Stream the agent's execution steps
|
||||
response = agent.create_turn(
|
||||
messages=[{"role": "user", "content": "Analyze this code and run it"}],
|
||||
attachments=[{
|
||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
||||
"mime_type": "text/plain"
|
||||
}],
|
||||
session_id=session_id
|
||||
attachments=[
|
||||
{
|
||||
"content": "https://raw.githubusercontent.com/example/code.py",
|
||||
"mime_type": "text/plain",
|
||||
}
|
||||
],
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Monitor each step of execution
|
||||
|
|
|
@ -15,6 +15,7 @@ This first example walks you through how to evaluate a model candidate served by
|
|||
|
||||
```python
|
||||
import datasets
|
||||
|
||||
ds = datasets.load_dataset(path="llamastack/mmmu", name="Agriculture", split="dev")
|
||||
ds = ds.select_columns(["chat_completion_input", "input_query", "expected_answer"])
|
||||
eval_rows = ds.to_pandas().to_dict(orient="records")
|
||||
|
@ -43,7 +44,7 @@ system_message = {
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::mmmu",
|
||||
dataset_id=f"mmmu-{subset}-{split}",
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"]
|
||||
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -62,9 +63,9 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
"system_message": system_message
|
||||
}
|
||||
}
|
||||
"system_message": system_message,
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -88,7 +89,7 @@ _ = client.datasets.register(
|
|||
"input_query": {"type": "string"},
|
||||
"expected_answer": {"type": "string"},
|
||||
"chat_completion_input": {"type": "chat_completion_input"},
|
||||
}
|
||||
},
|
||||
)
|
||||
|
||||
eval_rows = client.datasetio.get_rows_paginated(
|
||||
|
@ -101,7 +102,7 @@ eval_rows = client.datasetio.get_rows_paginated(
|
|||
client.eval_tasks.register(
|
||||
eval_task_id="meta-reference::simpleqa",
|
||||
dataset_id=simpleqa_dataset_id,
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"]
|
||||
scoring_functions=["llm-as-judge::405b-simpleqa"],
|
||||
)
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -120,8 +121,8 @@ response = client.eval.evaluate_rows(
|
|||
"max_tokens": 4096,
|
||||
"repeat_penalty": 1.0,
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -144,14 +145,14 @@ agent_config = {
|
|||
{
|
||||
"type": "brave_search",
|
||||
"engine": "tavily",
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY")
|
||||
"api_key": userdata.get("TAVILY_SEARCH_API_KEY"),
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"tool_prompt_format": "json",
|
||||
"input_shields": [],
|
||||
"output_shields": [],
|
||||
"enable_session_persistence": False
|
||||
"enable_session_persistence": False,
|
||||
}
|
||||
|
||||
response = client.eval.evaluate_rows(
|
||||
|
@ -163,7 +164,7 @@ response = client.eval.evaluate_rows(
|
|||
"eval_candidate": {
|
||||
"type": "agent",
|
||||
"config": agent_config,
|
||||
}
|
||||
}
|
||||
},
|
||||
},
|
||||
)
|
||||
```
|
||||
|
|
|
@ -13,7 +13,7 @@ Here's how to set up basic evaluation:
|
|||
response = client.eval_tasks.register(
|
||||
eval_task_id="my_eval",
|
||||
dataset_id="my_dataset",
|
||||
scoring_functions=["accuracy", "relevance"]
|
||||
scoring_functions=["accuracy", "relevance"],
|
||||
)
|
||||
|
||||
# Run evaluation
|
||||
|
@ -21,16 +21,10 @@ job = client.eval.run_eval(
|
|||
task_id="my_eval",
|
||||
task_config={
|
||||
"type": "app",
|
||||
"eval_candidate": {
|
||||
"type": "agent",
|
||||
"config": agent_config
|
||||
}
|
||||
}
|
||||
"eval_candidate": {"type": "agent", "config": agent_config},
|
||||
},
|
||||
)
|
||||
|
||||
# Get results
|
||||
result = client.eval.job_result(
|
||||
task_id="my_eval",
|
||||
job_id=job.job_id
|
||||
)
|
||||
result = client.eval.job_result(task_id="my_eval", job_id=job.job_id)
|
||||
```
|
||||
|
|
|
@ -34,15 +34,16 @@ chunks = [
|
|||
{
|
||||
"document_id": "doc1",
|
||||
"content": "Your document text here",
|
||||
"mime_type": "text/plain"
|
||||
"mime_type": "text/plain",
|
||||
},
|
||||
...
|
||||
...,
|
||||
]
|
||||
client.vector_io.insert(vector_db_id, chunks)
|
||||
|
||||
# You can then query for these chunks
|
||||
chunks_response = client.vector_io.query(vector_db_id, query="What do you know about...")
|
||||
|
||||
chunks_response = client.vector_io.query(
|
||||
vector_db_id, query="What do you know about..."
|
||||
)
|
||||
```
|
||||
|
||||
### Using the RAG Tool
|
||||
|
@ -81,7 +82,6 @@ results = client.tool_runtime.rag_tool.query(
|
|||
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
|
||||
|
||||
```python
|
||||
|
||||
# Configure agent with memory
|
||||
agent_config = AgentConfig(
|
||||
model="Llama3.2-3B-Instruct",
|
||||
|
@ -91,9 +91,9 @@ agent_config = AgentConfig(
|
|||
"name": "builtin::rag",
|
||||
"args": {
|
||||
"vector_db_ids": [vector_db_id],
|
||||
}
|
||||
},
|
||||
}
|
||||
]
|
||||
],
|
||||
)
|
||||
|
||||
agent = Agent(client, agent_config)
|
||||
|
@ -101,25 +101,21 @@ session_id = agent.create_session("rag_session")
|
|||
|
||||
# Initial document ingestion
|
||||
response = agent.create_turn(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "I am providing some documents for reference."
|
||||
}],
|
||||
messages=[
|
||||
{"role": "user", "content": "I am providing some documents for reference."}
|
||||
],
|
||||
documents=[
|
||||
dict(
|
||||
content="https://raw.githubusercontent.com/example/doc.rst",
|
||||
mime_type="text/plain"
|
||||
mime_type="text/plain",
|
||||
)
|
||||
],
|
||||
session_id=session_id
|
||||
session_id=session_id,
|
||||
)
|
||||
|
||||
# Query with RAG
|
||||
response = agent.create_turn(
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": "What are the key topics in the documents?"
|
||||
}],
|
||||
session_id=session_id
|
||||
messages=[{"role": "user", "content": "What are the key topics in the documents?"}],
|
||||
session_id=session_id,
|
||||
)
|
||||
```
|
||||
|
|
|
@ -5,15 +5,11 @@ Safety is a critical component of any AI application. Llama Stack provides a Shi
|
|||
```python
|
||||
# Register a safety shield
|
||||
shield_id = "content_safety"
|
||||
client.shields.register(
|
||||
shield_id=shield_id,
|
||||
provider_shield_id="llama-guard-basic"
|
||||
)
|
||||
client.shields.register(shield_id=shield_id, provider_shield_id="llama-guard-basic")
|
||||
|
||||
# Run content through shield
|
||||
response = client.safety.run_shield(
|
||||
shield_id=shield_id,
|
||||
messages=[{"role": "user", "content": "User message here"}]
|
||||
shield_id=shield_id, messages=[{"role": "user", "content": "User message here"}]
|
||||
)
|
||||
|
||||
if response.violation:
|
||||
|
|
|
@ -8,24 +8,16 @@ The telemetry system supports three main types of events:
|
|||
- **Unstructured Log Events**: Free-form log messages with severity levels
|
||||
```python
|
||||
unstructured_log_event = UnstructuredLogEvent(
|
||||
message="This is a log message",
|
||||
severity=LogSeverity.INFO
|
||||
message="This is a log message", severity=LogSeverity.INFO
|
||||
)
|
||||
```
|
||||
- **Metric Events**: Numerical measurements with units
|
||||
```python
|
||||
metric_event = MetricEvent(
|
||||
metric="my_metric",
|
||||
value=10,
|
||||
unit="count"
|
||||
)
|
||||
metric_event = MetricEvent(metric="my_metric", value=10, unit="count")
|
||||
```
|
||||
- **Structured Log Events**: System events like span start/end. Extensible to add more structured log types.
|
||||
```python
|
||||
structured_log_event = SpanStartPayload(
|
||||
name="my_span",
|
||||
parent_span_id="parent_span_id"
|
||||
)
|
||||
structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_span_id")
|
||||
```
|
||||
|
||||
### Spans and Traces
|
||||
|
|
|
@ -35,7 +35,7 @@ Example client SDK call to register a "websearch" toolgroup that is provided by
|
|||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::websearch",
|
||||
provider_id="brave-search",
|
||||
args={"max_results": 5}
|
||||
args={"max_results": 5},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -50,8 +50,7 @@ The Code Interpreter allows execution of Python code within a controlled environ
|
|||
```python
|
||||
# Register Code Interpreter tool group
|
||||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::code_interpreter",
|
||||
provider_id="code_interpreter"
|
||||
toolgroup_id="builtin::code_interpreter", provider_id="code_interpreter"
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -68,16 +67,14 @@ The WolframAlpha tool provides access to computational knowledge through the Wol
|
|||
```python
|
||||
# Register WolframAlpha tool group
|
||||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::wolfram_alpha",
|
||||
provider_id="wolfram-alpha"
|
||||
toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha"
|
||||
)
|
||||
```
|
||||
|
||||
Example usage:
|
||||
```python
|
||||
result = client.tool_runtime.invoke_tool(
|
||||
tool_name="wolfram_alpha",
|
||||
args={"query": "solve x^2 + 2x + 1 = 0"}
|
||||
tool_name="wolfram_alpha", args={"query": "solve x^2 + 2x + 1 = 0"}
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -90,10 +87,7 @@ The Memory tool enables retrieval of context from various types of memory banks
|
|||
client.toolgroups.register(
|
||||
toolgroup_id="builtin::memory",
|
||||
provider_id="memory",
|
||||
args={
|
||||
"max_chunks": 5,
|
||||
"max_tokens_in_context": 4096
|
||||
}
|
||||
args={"max_chunks": 5, "max_tokens_in_context": 4096},
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -136,9 +130,7 @@ config = AgentConfig(
|
|||
toolgroups=[
|
||||
"builtin::websearch",
|
||||
],
|
||||
client_tools=[
|
||||
ToolDef(name="client_tool", description="Client provided tool")
|
||||
]
|
||||
client_tools=[ToolDef(name="client_tool", description="Client provided tool")],
|
||||
)
|
||||
```
|
||||
|
||||
|
@ -167,9 +159,9 @@ Example tool definition:
|
|||
"name": "query",
|
||||
"parameter_type": "string",
|
||||
"description": "The query to search for",
|
||||
"required": True
|
||||
"required": True,
|
||||
}
|
||||
]
|
||||
],
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -179,8 +171,7 @@ Tools can be invoked using the `invoke_tool` method:
|
|||
|
||||
```python
|
||||
result = client.tool_runtime.invoke_tool(
|
||||
tool_name="web_search",
|
||||
kwargs={"query": "What is the capital of France?"}
|
||||
tool_name="web_search", kwargs={"query": "What is the capital of France?"}
|
||||
)
|
||||
```
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue