Fix precommit check after moving to ruff (#927)

Lint check in main branch is failing. This fixes the lint check after we
moved to ruff in https://github.com/meta-llama/llama-stack/pull/921. We
need to move to a `ruff.toml` file as well as fixing and ignoring some
additional checks.

Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
Yuan Tang 2025-02-02 09:46:45 -05:00 committed by GitHub
parent 4773092dd1
commit 34ab7a3b6c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
217 changed files with 981 additions and 2681 deletions

View file

@ -77,7 +77,7 @@ agent_config = AgentConfig(
instructions="You are a helpful assistant",
# Enable both RAG and tool usage
toolgroups=[
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}}.
{"name": "builtin::rag", "args": {"vector_db_ids": ["my_docs"]}},
"builtin::code_interpreter",
],
# Configure safety
@ -86,13 +86,9 @@ agent_config = AgentConfig(
# Control the inference loop
max_infer_iters=5,
sampling_params={
"strategy": {
"type": "top_p",
"temperature": 0.7,
"top_p": 0.95
},
"max_tokens": 2048
}
"strategy": {"type": "top_p", "temperature": 0.7, "top_p": 0.95},
"max_tokens": 2048,
},
)
agent = Agent(client, agent_config)
@ -101,11 +97,13 @@ session_id = agent.create_session("monitored_session")
# Stream the agent's execution steps
response = agent.create_turn(
messages=[{"role": "user", "content": "Analyze this code and run it"}],
attachments=[{
"content": "https://raw.githubusercontent.com/example/code.py",
"mime_type": "text/plain"
}],
session_id=session_id
attachments=[
{
"content": "https://raw.githubusercontent.com/example/code.py",
"mime_type": "text/plain",
}
],
session_id=session_id,
)
# Monitor each step of execution

View file

@ -15,6 +15,7 @@ This first example walks you through how to evaluate a model candidate served by
```python
import datasets
ds = datasets.load_dataset(path="llamastack/mmmu", name="Agriculture", split="dev")
ds = ds.select_columns(["chat_completion_input", "input_query", "expected_answer"])
eval_rows = ds.to_pandas().to_dict(orient="records")
@ -43,7 +44,7 @@ system_message = {
client.eval_tasks.register(
eval_task_id="meta-reference::mmmu",
dataset_id=f"mmmu-{subset}-{split}",
scoring_functions=["basic::regex_parser_multiple_choice_answer"]
scoring_functions=["basic::regex_parser_multiple_choice_answer"],
)
response = client.eval.evaluate_rows(
@ -62,9 +63,9 @@ response = client.eval.evaluate_rows(
"max_tokens": 4096,
"repeat_penalty": 1.0,
},
"system_message": system_message
}
}
"system_message": system_message,
},
},
)
```
@ -88,7 +89,7 @@ _ = client.datasets.register(
"input_query": {"type": "string"},
"expected_answer": {"type": "string"},
"chat_completion_input": {"type": "chat_completion_input"},
}
},
)
eval_rows = client.datasetio.get_rows_paginated(
@ -101,7 +102,7 @@ eval_rows = client.datasetio.get_rows_paginated(
client.eval_tasks.register(
eval_task_id="meta-reference::simpleqa",
dataset_id=simpleqa_dataset_id,
scoring_functions=["llm-as-judge::405b-simpleqa"]
scoring_functions=["llm-as-judge::405b-simpleqa"],
)
response = client.eval.evaluate_rows(
@ -120,8 +121,8 @@ response = client.eval.evaluate_rows(
"max_tokens": 4096,
"repeat_penalty": 1.0,
},
}
}
},
},
)
```
@ -144,14 +145,14 @@ agent_config = {
{
"type": "brave_search",
"engine": "tavily",
"api_key": userdata.get("TAVILY_SEARCH_API_KEY")
"api_key": userdata.get("TAVILY_SEARCH_API_KEY"),
}
],
"tool_choice": "auto",
"tool_prompt_format": "json",
"input_shields": [],
"output_shields": [],
"enable_session_persistence": False
"enable_session_persistence": False,
}
response = client.eval.evaluate_rows(
@ -163,7 +164,7 @@ response = client.eval.evaluate_rows(
"eval_candidate": {
"type": "agent",
"config": agent_config,
}
}
},
},
)
```

View file

@ -13,7 +13,7 @@ Here's how to set up basic evaluation:
response = client.eval_tasks.register(
eval_task_id="my_eval",
dataset_id="my_dataset",
scoring_functions=["accuracy", "relevance"]
scoring_functions=["accuracy", "relevance"],
)
# Run evaluation
@ -21,16 +21,10 @@ job = client.eval.run_eval(
task_id="my_eval",
task_config={
"type": "app",
"eval_candidate": {
"type": "agent",
"config": agent_config
}
}
"eval_candidate": {"type": "agent", "config": agent_config},
},
)
# Get results
result = client.eval.job_result(
task_id="my_eval",
job_id=job.job_id
)
result = client.eval.job_result(task_id="my_eval", job_id=job.job_id)
```

View file

@ -34,15 +34,16 @@ chunks = [
{
"document_id": "doc1",
"content": "Your document text here",
"mime_type": "text/plain"
"mime_type": "text/plain",
},
...
...,
]
client.vector_io.insert(vector_db_id, chunks)
# You can then query for these chunks
chunks_response = client.vector_io.query(vector_db_id, query="What do you know about...")
chunks_response = client.vector_io.query(
vector_db_id, query="What do you know about..."
)
```
### Using the RAG Tool
@ -81,7 +82,6 @@ results = client.tool_runtime.rag_tool.query(
One of the most powerful patterns is combining agents with RAG capabilities. Here's a complete example:
```python
# Configure agent with memory
agent_config = AgentConfig(
model="Llama3.2-3B-Instruct",
@ -91,9 +91,9 @@ agent_config = AgentConfig(
"name": "builtin::rag",
"args": {
"vector_db_ids": [vector_db_id],
}
},
}
]
],
)
agent = Agent(client, agent_config)
@ -101,25 +101,21 @@ session_id = agent.create_session("rag_session")
# Initial document ingestion
response = agent.create_turn(
messages=[{
"role": "user",
"content": "I am providing some documents for reference."
}],
messages=[
{"role": "user", "content": "I am providing some documents for reference."}
],
documents=[
dict(
content="https://raw.githubusercontent.com/example/doc.rst",
mime_type="text/plain"
mime_type="text/plain",
)
],
session_id=session_id
session_id=session_id,
)
# Query with RAG
response = agent.create_turn(
messages=[{
"role": "user",
"content": "What are the key topics in the documents?"
}],
session_id=session_id
messages=[{"role": "user", "content": "What are the key topics in the documents?"}],
session_id=session_id,
)
```

View file

@ -5,15 +5,11 @@ Safety is a critical component of any AI application. Llama Stack provides a Shi
```python
# Register a safety shield
shield_id = "content_safety"
client.shields.register(
shield_id=shield_id,
provider_shield_id="llama-guard-basic"
)
client.shields.register(shield_id=shield_id, provider_shield_id="llama-guard-basic")
# Run content through shield
response = client.safety.run_shield(
shield_id=shield_id,
messages=[{"role": "user", "content": "User message here"}]
shield_id=shield_id, messages=[{"role": "user", "content": "User message here"}]
)
if response.violation:

View file

@ -8,24 +8,16 @@ The telemetry system supports three main types of events:
- **Unstructured Log Events**: Free-form log messages with severity levels
```python
unstructured_log_event = UnstructuredLogEvent(
message="This is a log message",
severity=LogSeverity.INFO
message="This is a log message", severity=LogSeverity.INFO
)
```
- **Metric Events**: Numerical measurements with units
```python
metric_event = MetricEvent(
metric="my_metric",
value=10,
unit="count"
)
metric_event = MetricEvent(metric="my_metric", value=10, unit="count")
```
- **Structured Log Events**: System events like span start/end. Extensible to add more structured log types.
```python
structured_log_event = SpanStartPayload(
name="my_span",
parent_span_id="parent_span_id"
)
structured_log_event = SpanStartPayload(name="my_span", parent_span_id="parent_span_id")
```
### Spans and Traces

View file

@ -35,7 +35,7 @@ Example client SDK call to register a "websearch" toolgroup that is provided by
client.toolgroups.register(
toolgroup_id="builtin::websearch",
provider_id="brave-search",
args={"max_results": 5}
args={"max_results": 5},
)
```
@ -50,8 +50,7 @@ The Code Interpreter allows execution of Python code within a controlled environ
```python
# Register Code Interpreter tool group
client.toolgroups.register(
toolgroup_id="builtin::code_interpreter",
provider_id="code_interpreter"
toolgroup_id="builtin::code_interpreter", provider_id="code_interpreter"
)
```
@ -68,16 +67,14 @@ The WolframAlpha tool provides access to computational knowledge through the Wol
```python
# Register WolframAlpha tool group
client.toolgroups.register(
toolgroup_id="builtin::wolfram_alpha",
provider_id="wolfram-alpha"
toolgroup_id="builtin::wolfram_alpha", provider_id="wolfram-alpha"
)
```
Example usage:
```python
result = client.tool_runtime.invoke_tool(
tool_name="wolfram_alpha",
args={"query": "solve x^2 + 2x + 1 = 0"}
tool_name="wolfram_alpha", args={"query": "solve x^2 + 2x + 1 = 0"}
)
```
@ -90,10 +87,7 @@ The Memory tool enables retrieval of context from various types of memory banks
client.toolgroups.register(
toolgroup_id="builtin::memory",
provider_id="memory",
args={
"max_chunks": 5,
"max_tokens_in_context": 4096
}
args={"max_chunks": 5, "max_tokens_in_context": 4096},
)
```
@ -136,9 +130,7 @@ config = AgentConfig(
toolgroups=[
"builtin::websearch",
],
client_tools=[
ToolDef(name="client_tool", description="Client provided tool")
]
client_tools=[ToolDef(name="client_tool", description="Client provided tool")],
)
```
@ -167,9 +159,9 @@ Example tool definition:
"name": "query",
"parameter_type": "string",
"description": "The query to search for",
"required": True
"required": True,
}
]
],
}
```
@ -179,8 +171,7 @@ Tools can be invoked using the `invoke_tool` method:
```python
result = client.tool_runtime.invoke_tool(
tool_name="web_search",
kwargs={"query": "What is the capital of France?"}
tool_name="web_search", kwargs={"query": "What is the capital of France?"}
)
```