mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-03 19:57:35 +00:00
precommit
This commit is contained in:
parent
2a54a2433f
commit
cbe7391574
6 changed files with 35 additions and 38 deletions
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
description: "Agents
|
||||
|
||||
APIs for creating and interacting with agentic systems."
|
||||
APIs for creating and interacting with agentic systems."
|
||||
sidebar_label: Agents
|
||||
title: Agents
|
||||
---
|
||||
|
@ -12,6 +12,6 @@ title: Agents
|
|||
|
||||
Agents
|
||||
|
||||
APIs for creating and interacting with agentic systems.
|
||||
APIs for creating and interacting with agentic systems.
|
||||
|
||||
This section contains documentation for all available providers for the **agents** API.
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
---
|
||||
description: "The Batches API enables efficient processing of multiple requests in a single operation,
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes."
|
||||
Note: This API is currently under active development and may undergo changes."
|
||||
sidebar_label: Batches
|
||||
title: Batches
|
||||
---
|
||||
|
@ -18,14 +18,14 @@ title: Batches
|
|||
## Overview
|
||||
|
||||
The Batches API enables efficient processing of multiple requests in a single operation,
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
particularly useful for processing large datasets, batch evaluation workflows, and
|
||||
cost-effective inference at scale.
|
||||
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
|
||||
This section contains documentation for all available providers for the **batches** API.
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
---
|
||||
description: "Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search."
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search."
|
||||
sidebar_label: Inference
|
||||
title: Inference
|
||||
---
|
||||
|
@ -14,8 +14,8 @@ title: Inference
|
|||
|
||||
Llama Stack Inference API for generating completions, chat completions, and embeddings.
|
||||
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
This API provides the raw interface to the underlying models. Two kinds of models are supported:
|
||||
- LLM models: these models generate "raw" and "chat" (conversational) completions.
|
||||
- Embedding models: these models generate embeddings to be used for semantic search.
|
||||
|
||||
This section contains documentation for all available providers for the **inference** API.
|
||||
|
|
|
@ -819,7 +819,12 @@ class Agents(Protocol):
|
|||
tools: list[OpenAIResponseInputTool] | None = None,
|
||||
include: list[str] | None = None,
|
||||
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
|
||||
shields: Annotated[list[ResponseShield] | None, ExtraBodyField("List of shields to apply during response generation. Shields provide safety and content moderation.")] = None,
|
||||
shields: Annotated[
|
||||
list[ResponseShield] | None,
|
||||
ExtraBodyField(
|
||||
"List of shields to apply during response generation. Shields provide safety and content moderation."
|
||||
),
|
||||
] = None,
|
||||
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
|
||||
"""Create a new OpenAI response.
|
||||
|
||||
|
|
|
@ -6,15 +6,12 @@
|
|||
|
||||
from collections.abc import Callable
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Generic, TypeVar
|
||||
from typing import Any
|
||||
|
||||
from .strong_typing.schema import json_schema_type, register_schema # noqa: F401
|
||||
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
class ExtraBodyField(Generic[T]):
|
||||
class ExtraBodyField[T]:
|
||||
"""
|
||||
Marker annotation for parameters that arrive via extra_body in the client SDK.
|
||||
|
||||
|
@ -30,7 +27,9 @@ class ExtraBodyField(Generic[T]):
|
|||
self,
|
||||
input: str,
|
||||
model: str,
|
||||
shields: Annotated[list[str] | None, ExtraBodyField("List of shields to apply")] = None,
|
||||
shields: Annotated[
|
||||
list[str] | None, ExtraBodyField("List of shields to apply")
|
||||
] = None,
|
||||
) -> ResponseObject:
|
||||
# shields is available here with proper typing
|
||||
if shields:
|
||||
|
@ -40,12 +39,11 @@ class ExtraBodyField(Generic[T]):
|
|||
Client usage:
|
||||
```python
|
||||
client.responses.create(
|
||||
input="hello",
|
||||
model="llama-3",
|
||||
extra_body={"shields": ["shield-1"]}
|
||||
input="hello", model="llama-3", extra_body={"shields": ["shield-1"]}
|
||||
)
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(self, description: str | None = None):
|
||||
self.description = description
|
||||
|
||||
|
|
|
@ -25,9 +25,7 @@ def test_shields_via_extra_body(compat_client, text_model_id):
|
|||
model=text_model_id,
|
||||
input="What is the capital of France?",
|
||||
stream=False,
|
||||
extra_body={
|
||||
"shields": ["test-shield-1", "test-shield-2"]
|
||||
}
|
||||
extra_body={"shields": ["test-shield-1", "test-shield-2"]},
|
||||
)
|
||||
|
||||
# Verify the error message indicates shields are not implemented
|
||||
|
@ -35,8 +33,6 @@ def test_shields_via_extra_body(compat_client, text_model_id):
|
|||
assert "not yet implemented" in error_message.lower() or "not implemented" in error_message.lower()
|
||||
|
||||
|
||||
|
||||
|
||||
def test_response_without_shields_still_works(compat_client, text_model_id):
|
||||
"""Test that responses still work without shields parameter (backwards compatibility)."""
|
||||
|
||||
|
@ -70,9 +66,7 @@ def test_shields_parameter_received_end_to_end(compat_client, text_model_id):
|
|||
model=text_model_id,
|
||||
input="Test message for shields verification",
|
||||
stream=False,
|
||||
extra_body={
|
||||
"shields": ["shield-1", "shield-2"]
|
||||
}
|
||||
extra_body={"shields": ["shield-1", "shield-2"]},
|
||||
)
|
||||
|
||||
# The NotImplementedError proves that:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue