forked from phoenix-oss/llama-stack-mirror
Merge branch 'main' into pr1573
This commit is contained in:
commit
8942071b3b
10 changed files with 171 additions and 123 deletions
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
|
@ -1,6 +1,8 @@
|
|||
name: Unit Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
[](https://pypi.org/project/llama-stack/)
|
||||
[](https://github.com/meta-llama/llama-stack/blob/main/LICENSE)
|
||||
[](https://discord.gg/llama-stack)
|
||||

|
||||
|
||||
[**Quick Start**](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) | [**Documentation**](https://llama-stack.readthedocs.io/en/latest/index.html) | [**Colab Notebook**](./docs/getting_started.ipynb)
|
||||
|
||||
|
|
153
docs/_static/llama-stack-spec.html
vendored
153
docs/_static/llama-stack-spec.html
vendored
|
@ -4556,7 +4556,7 @@
|
|||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricEvent"
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
}
|
||||
},
|
||||
"completion_message": {
|
||||
|
@ -4578,46 +4578,9 @@
|
|||
"title": "ChatCompletionResponse",
|
||||
"description": "Response from a chat completion request."
|
||||
},
|
||||
"MetricEvent": {
|
||||
"MetricInResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"span_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "metric",
|
||||
"default": "metric"
|
||||
},
|
||||
"metric": {
|
||||
"type": "string"
|
||||
},
|
||||
|
@ -4637,15 +4600,10 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"timestamp",
|
||||
"type",
|
||||
"metric",
|
||||
"value",
|
||||
"unit"
|
||||
"value"
|
||||
],
|
||||
"title": "MetricEvent"
|
||||
"title": "MetricInResponse"
|
||||
},
|
||||
"TokenLogProbs": {
|
||||
"type": "object",
|
||||
|
@ -4722,6 +4680,12 @@
|
|||
"CompletionResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
}
|
||||
},
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "The generated completion text"
|
||||
|
@ -4931,7 +4895,7 @@
|
|||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricEvent"
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
}
|
||||
},
|
||||
"event": {
|
||||
|
@ -5089,6 +5053,12 @@
|
|||
"CompletionResponseStreamChunk": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"metrics": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/MetricInResponse"
|
||||
}
|
||||
},
|
||||
"delta": {
|
||||
"type": "string",
|
||||
"description": "New content generated since last chunk. This can be one or more tokens."
|
||||
|
@ -8501,6 +8471,75 @@
|
|||
],
|
||||
"title": "LogSeverity"
|
||||
},
|
||||
"MetricEvent": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"trace_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"span_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"timestamp": {
|
||||
"type": "string",
|
||||
"format": "date-time"
|
||||
},
|
||||
"attributes": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "null"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "metric",
|
||||
"default": "metric"
|
||||
},
|
||||
"metric": {
|
||||
"type": "string"
|
||||
},
|
||||
"value": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "integer"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"trace_id",
|
||||
"span_id",
|
||||
"timestamp",
|
||||
"type",
|
||||
"metric",
|
||||
"value",
|
||||
"unit"
|
||||
],
|
||||
"title": "MetricEvent"
|
||||
},
|
||||
"SpanEndPayload": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
@ -9625,21 +9664,11 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"tool_responses": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponse"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponseMessage"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The tool call responses to resume the turn with. NOTE: ToolResponseMessage will be deprecated. Use ToolResponse."
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolResponse"
|
||||
},
|
||||
"description": "The tool call responses to resume the turn with."
|
||||
},
|
||||
"stream": {
|
||||
"type": "boolean",
|
||||
|
|
95
docs/_static/llama-stack-spec.yaml
vendored
95
docs/_static/llama-stack-spec.yaml
vendored
|
@ -3105,7 +3105,7 @@ components:
|
|||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricEvent'
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
completion_message:
|
||||
$ref: '#/components/schemas/CompletionMessage'
|
||||
description: The complete response message
|
||||
|
@ -3120,29 +3120,9 @@ components:
|
|||
- completion_message
|
||||
title: ChatCompletionResponse
|
||||
description: Response from a chat completion request.
|
||||
MetricEvent:
|
||||
MetricInResponse:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
span_id:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
type:
|
||||
type: string
|
||||
const: metric
|
||||
default: metric
|
||||
metric:
|
||||
type: string
|
||||
value:
|
||||
|
@ -3153,14 +3133,9 @@ components:
|
|||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
title: MetricInResponse
|
||||
TokenLogProbs:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -3217,6 +3192,10 @@ components:
|
|||
CompletionResponse:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
content:
|
||||
type: string
|
||||
description: The generated completion text
|
||||
|
@ -3416,7 +3395,7 @@ components:
|
|||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricEvent'
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
event:
|
||||
$ref: '#/components/schemas/ChatCompletionResponseEvent'
|
||||
description: The event containing the new content
|
||||
|
@ -3535,6 +3514,10 @@ components:
|
|||
CompletionResponseStreamChunk:
|
||||
type: object
|
||||
properties:
|
||||
metrics:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/MetricInResponse'
|
||||
delta:
|
||||
type: string
|
||||
description: >-
|
||||
|
@ -5784,6 +5767,47 @@ components:
|
|||
- error
|
||||
- critical
|
||||
title: LogSeverity
|
||||
MetricEvent:
|
||||
type: object
|
||||
properties:
|
||||
trace_id:
|
||||
type: string
|
||||
span_id:
|
||||
type: string
|
||||
timestamp:
|
||||
type: string
|
||||
format: date-time
|
||||
attributes:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: integer
|
||||
- type: number
|
||||
- type: boolean
|
||||
- type: 'null'
|
||||
type:
|
||||
type: string
|
||||
const: metric
|
||||
default: metric
|
||||
metric:
|
||||
type: string
|
||||
value:
|
||||
oneOf:
|
||||
- type: integer
|
||||
- type: number
|
||||
unit:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- trace_id
|
||||
- span_id
|
||||
- timestamp
|
||||
- type
|
||||
- metric
|
||||
- value
|
||||
- unit
|
||||
title: MetricEvent
|
||||
SpanEndPayload:
|
||||
type: object
|
||||
properties:
|
||||
|
@ -6495,16 +6519,11 @@ components:
|
|||
type: object
|
||||
properties:
|
||||
tool_responses:
|
||||
oneOf:
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponse'
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponseMessage'
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolResponse'
|
||||
description: >-
|
||||
The tool call responses to resume the turn with. NOTE: ToolResponseMessage
|
||||
will be deprecated. Use ToolResponse.
|
||||
The tool call responses to resume the turn with.
|
||||
stream:
|
||||
type: boolean
|
||||
description: Whether to stream the response.
|
||||
|
|
|
@ -370,7 +370,7 @@ class AgentTurnResumeRequest(BaseModel):
|
|||
agent_id: str
|
||||
session_id: str
|
||||
turn_id: str
|
||||
tool_responses: Union[List[ToolResponse], List[ToolResponseMessage]]
|
||||
tool_responses: List[ToolResponse]
|
||||
stream: Optional[bool] = False
|
||||
|
||||
|
||||
|
@ -449,7 +449,7 @@ class Agents(Protocol):
|
|||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
tool_responses: Union[List[ToolResponse], List[ToolResponseMessage]],
|
||||
tool_responses: List[ToolResponse],
|
||||
stream: Optional[bool] = False,
|
||||
) -> Union[Turn, AsyncIterator[AgentTurnResponseStreamChunk]]:
|
||||
"""Resume an agent turn with executed tool call responses.
|
||||
|
@ -460,7 +460,6 @@ class Agents(Protocol):
|
|||
:param session_id: The ID of the session to resume.
|
||||
:param turn_id: The ID of the turn to resume.
|
||||
:param tool_responses: The tool call responses to resume the turn with.
|
||||
NOTE: ToolResponseMessage will be deprecated. Use ToolResponse.
|
||||
:param stream: Whether to stream the response.
|
||||
:returns: A Turn object if stream is False, otherwise an AsyncIterator of AgentTurnResponseStreamChunk objects.
|
||||
"""
|
||||
|
|
|
@ -96,6 +96,13 @@ class MetricEvent(EventCommon):
|
|||
unit: str
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class MetricInResponse(BaseModel):
|
||||
metric: str
|
||||
value: Union[int, float]
|
||||
unit: Optional[str] = None
|
||||
|
||||
|
||||
# This is a short term solution to allow inference API to return metrics
|
||||
# The ideal way to do this is to have a way for all response types to include metrics
|
||||
# and all metric events logged to the telemetry API to be inlcuded with the response
|
||||
|
@ -117,7 +124,7 @@ class MetricEvent(EventCommon):
|
|||
|
||||
|
||||
class MetricResponseMixin(BaseModel):
|
||||
metrics: Optional[List[MetricEvent]] = None
|
||||
metrics: Optional[List[MetricInResponse]] = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
|
@ -48,7 +48,7 @@ from llama_stack.apis.scoring import (
|
|||
ScoringFnParams,
|
||||
)
|
||||
from llama_stack.apis.shields import Shield
|
||||
from llama_stack.apis.telemetry import MetricEvent, Telemetry
|
||||
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
|
||||
from llama_stack.apis.tools import (
|
||||
RAGDocument,
|
||||
RAGQueryConfig,
|
||||
|
@ -206,12 +206,12 @@ class InferenceRouter(Inference):
|
|||
completion_tokens: int,
|
||||
total_tokens: int,
|
||||
model: Model,
|
||||
) -> List[MetricEvent]:
|
||||
) -> List[MetricInResponse]:
|
||||
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
|
||||
if self.telemetry:
|
||||
for metric in metrics:
|
||||
await self.telemetry.log_event(metric)
|
||||
return metrics
|
||||
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
|
||||
|
||||
async def _count_tokens(
|
||||
self,
|
||||
|
@ -238,7 +238,6 @@ class InferenceRouter(Inference):
|
|||
tool_config: Optional[ToolConfig] = None,
|
||||
) -> Union[ChatCompletionResponse, AsyncIterator[ChatCompletionResponseStreamChunk]]:
|
||||
logger.debug(
|
||||
"core",
|
||||
f"InferenceRouter.chat_completion: {model_id=}, {stream=}, {messages=}, {tools=}, {tool_config=}, {response_format=}",
|
||||
)
|
||||
if sampling_params is None:
|
||||
|
|
|
@ -19,7 +19,7 @@ def preserve_contexts_async_generator(
|
|||
and we need to preserve the context across the event loop boundary.
|
||||
"""
|
||||
|
||||
async def wrapper():
|
||||
async def wrapper() -> AsyncGenerator[T, None]:
|
||||
while True:
|
||||
try:
|
||||
item = await gen.__anext__()
|
||||
|
|
|
@ -218,18 +218,10 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
steps = []
|
||||
messages = await self.get_messages_from_turns(turns)
|
||||
if is_resume:
|
||||
if isinstance(request.tool_responses[0], ToolResponseMessage):
|
||||
tool_response_messages = request.tool_responses
|
||||
tool_responses = [
|
||||
ToolResponse(call_id=x.call_id, tool_name=x.tool_name, content=x.content)
|
||||
for x in request.tool_responses
|
||||
]
|
||||
else:
|
||||
tool_response_messages = [
|
||||
ToolResponseMessage(call_id=x.call_id, tool_name=x.tool_name, content=x.content)
|
||||
for x in request.tool_responses
|
||||
]
|
||||
tool_responses = request.tool_responses
|
||||
tool_response_messages = [
|
||||
ToolResponseMessage(call_id=x.call_id, tool_name=x.tool_name, content=x.content)
|
||||
for x in request.tool_responses
|
||||
]
|
||||
messages.extend(tool_response_messages)
|
||||
last_turn = turns[-1]
|
||||
last_turn_messages = self.turn_to_messages(last_turn)
|
||||
|
@ -252,7 +244,7 @@ class ChatAgent(ShieldRunnerMixin):
|
|||
step_id=(in_progress_tool_call_step.step_id if in_progress_tool_call_step else str(uuid.uuid4())),
|
||||
turn_id=request.turn_id,
|
||||
tool_calls=(in_progress_tool_call_step.tool_calls if in_progress_tool_call_step else []),
|
||||
tool_responses=tool_responses,
|
||||
tool_responses=request.tool_responses,
|
||||
completed_at=now,
|
||||
started_at=(in_progress_tool_call_step.started_at if in_progress_tool_call_step else now),
|
||||
)
|
||||
|
|
|
@ -172,7 +172,7 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
agent_id: str,
|
||||
session_id: str,
|
||||
turn_id: str,
|
||||
tool_responses: Union[List[ToolResponse], List[ToolResponseMessage]],
|
||||
tool_responses: List[ToolResponse],
|
||||
stream: Optional[bool] = False,
|
||||
) -> AsyncGenerator:
|
||||
request = AgentTurnResumeRequest(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue