mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
precommit
This commit is contained in:
parent
2a54a2433f
commit
cbe7391574
6 changed files with 35 additions and 38 deletions
|
@ -1,7 +1,7 @@
|
||||||
---
|
---
|
||||||
description: "Agents
|
description: "Agents
|
||||||
|
|
||||||
APIs for creating and interacting with agentic systems."
|
APIs for creating and interacting with agentic systems."
|
||||||
sidebar_label: Agents
|
sidebar_label: Agents
|
||||||
title: Agents
|
title: Agents
|
||||||
---
|
---
|
||||||
|
@ -12,6 +12,6 @@ title: Agents
|
||||||
|
|
||||||
Agents
|
Agents
|
||||||
|
|
||||||
APIs for creating and interacting with agentic systems.
|
APIs for creating and interacting with agentic systems.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **agents** API.
|
This section contains documentation for all available providers for the **agents** API.
|
||||||
|
|
|
@ -1,14 +1,14 @@
|
||||||
---
|
---
|
||||||
description: "The Batches API enables efficient processing of multiple requests in a single operation,
|
description: "The Batches API enables efficient processing of multiple requests in a single operation,
|
||||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||||
cost-effective inference at scale.
|
cost-effective inference at scale.
|
||||||
|
|
||||||
The API is designed to allow use of openai client libraries for seamless integration.
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
This API provides the following extensions:
|
This API provides the following extensions:
|
||||||
- idempotent batch creation
|
- idempotent batch creation
|
||||||
|
|
||||||
Note: This API is currently under active development and may undergo changes."
|
Note: This API is currently under active development and may undergo changes."
|
||||||
sidebar_label: Batches
|
sidebar_label: Batches
|
||||||
title: Batches
|
title: Batches
|
||||||
---
|
---
|
||||||
|
@ -18,14 +18,14 @@ title: Batches
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
The Batches API enables efficient processing of multiple requests in a single operation,
|
The Batches API enables efficient processing of multiple requests in a single operation,
|
||||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||||
cost-effective inference at scale.
|
cost-effective inference at scale.
|
||||||
|
|
||||||
The API is designed to allow use of openai client libraries for seamless integration.
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
This API provides the following extensions:
|
This API provides the following extensions:
|
||||||
- idempotent batch creation
|
- idempotent batch creation
|
||||||
|
|
||||||
Note: This API is currently under active development and may undergo changes.
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **batches** API.
|
This section contains documentation for all available providers for the **batches** API.
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
---
|
---
|
||||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search."
|
- Embedding models: these models generate embeddings to be used for semantic search."
|
||||||
sidebar_label: Inference
|
sidebar_label: Inference
|
||||||
title: Inference
|
title: Inference
|
||||||
---
|
---
|
||||||
|
@ -14,8 +14,8 @@ title: Inference
|
||||||
|
|
||||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||||
|
|
||||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||||
|
|
||||||
This section contains documentation for all available providers for the **inference** API.
|
This section contains documentation for all available providers for the **inference** API.
|
||||||
|
|
|
@ -819,7 +819,12 @@ class Agents(Protocol):
|
||||||
tools: list[OpenAIResponseInputTool] | None = None,
|
tools: list[OpenAIResponseInputTool] | None = None,
|
||||||
include: list[str] | None = None,
|
include: list[str] | None = None,
|
||||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||||
shields: Annotated[list[ResponseShield] | None, ExtraBodyField("List of shields to apply during response generation. Shields provide safety and content moderation.")] = None,
|
shields: Annotated[
|
||||||
|
list[ResponseShield] | None,
|
||||||
|
ExtraBodyField(
|
||||||
|
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||||
"""Create a new OpenAI response.
|
"""Create a new OpenAI response.
|
||||||
|
|
||||||
|
|
|
@ -6,15 +6,12 @@
|
||||||
|
|
||||||
from collections.abc import Callable
|
from collections.abc import Callable
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Any, Generic, TypeVar
|
from typing import Any
|
||||||
|
|
||||||
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar("T")
|
class ExtraBodyField[T]:
|
||||||
|
|
||||||
|
|
||||||
class ExtraBodyField(Generic[T]):
|
|
||||||
"""
|
"""
|
||||||
Marker annotation for parameters that arrive via extra_body in the client SDK.
|
Marker annotation for parameters that arrive via extra_body in the client SDK.
|
||||||
|
|
||||||
|
@ -30,7 +27,9 @@ class ExtraBodyField(Generic[T]):
|
||||||
self,
|
self,
|
||||||
input: str,
|
input: str,
|
||||||
model: str,
|
model: str,
|
||||||
shields: Annotated[list[str] | None, ExtraBodyField("List of shields to apply")] = None,
|
shields: Annotated[
|
||||||
|
list[str] | None, ExtraBodyField("List of shields to apply")
|
||||||
|
] = None,
|
||||||
) -> ResponseObject:
|
) -> ResponseObject:
|
||||||
# shields is available here with proper typing
|
# shields is available here with proper typing
|
||||||
if shields:
|
if shields:
|
||||||
|
@ -40,12 +39,11 @@ class ExtraBodyField(Generic[T]):
|
||||||
Client usage:
|
Client usage:
|
||||||
```python
|
```python
|
||||||
client.responses.create(
|
client.responses.create(
|
||||||
input="hello",
|
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
|
||||||
model="llama-3",
|
|
||||||
extra_body={"shields": ["shield-1"]}
|
|
||||||
)
|
)
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, description: str | None = None):
|
def __init__(self, description: str | None = None):
|
||||||
self.description = description
|
self.description = description
|
||||||
|
|
||||||
|
|
|
@ -25,9 +25,7 @@ def test_shields_via_extra_body(compat_client, text_model_id):
|
||||||
model=text_model_id,
|
model=text_model_id,
|
||||||
input="What is the capital of France?",
|
input="What is the capital of France?",
|
||||||
stream=False,
|
stream=False,
|
||||||
extra_body={
|
extra_body={"shields": ["test-shield-1", "test-shield-2"]},
|
||||||
"shields": ["test-shield-1", "test-shield-2"]
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Verify the error message indicates shields are not implemented
|
# Verify the error message indicates shields are not implemented
|
||||||
|
@ -35,8 +33,6 @@ def test_shields_via_extra_body(compat_client, text_model_id):
|
||||||
assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower()
|
assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_response_without_shields_still_works(compat_client, text_model_id):
|
def test_response_without_shields_still_works(compat_client, text_model_id):
|
||||||
"""Test that responses still work without shields parameter (backwards compatibility)."""
|
"""Test that responses still work without shields parameter (backwards compatibility)."""
|
||||||
|
|
||||||
|
@ -70,9 +66,7 @@ def test_shields_parameter_received_end_to_end(compat_client, text_model_id):
|
||||||
model=text_model_id,
|
model=text_model_id,
|
||||||
input="Test message for shields verification",
|
input="Test message for shields verification",
|
||||||
stream=False,
|
stream=False,
|
||||||
extra_body={
|
extra_body={"shields": ["shield-1", "shield-2"]},
|
||||||
"shields": ["shield-1", "shield-2"]
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# The NotImplementedError proves that:
|
# The NotImplementedError proves that:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue