forked from phoenix-oss/llama-stack-mirror
# What does this PR do? Change the Telemetry API to be able to support different use cases like returning traces for the UI and ability to export for Evals. Other changes: * Add a new trace_protocol decorator to decorate all our API methods so that any call to them will automatically get traced across all impls. * There is some issue with the decorator pattern of span creation when using async generators, where there are multiple yields with in the same context. I think its much more explicit by using the explicit context manager pattern using with. I moved the span creations in agent instance to be using with * Inject session id at the turn level, which should quickly give us all traces across turns for a given session Addresses #509 ## Test Plan ``` llama stack run /Users/dineshyv/.llama/distributions/llamastack-together/together-run.yaml PYTHONPATH=. python -m examples.agents.rag_with_memory_bank localhost 5000 curl -X POST 'http://localhost:5000/alpha/telemetry/query-traces' \ -H 'Content-Type: application/json' \ -d '{ "attribute_filters": [ { "key": "session_id", "op": "eq", "value": "dd667b87-ca4b-4d30-9265-5a0de318fc65" }], "limit": 100, "offset": 0, "order_by": ["start_time"] }' | jq . [ { "trace_id": "6902f54b83b4b48be18a6f422b13e16f", "root_span_id": "5f37b85543afc15a", "start_time": "2024-12-04T08:08:30.501587", "end_time": "2024-12-04T08:08:36.026463" }, { "trace_id": "92227dac84c0615ed741be393813fb5f", "root_span_id": "af7c5bb46665c2c8", "start_time": "2024-12-04T08:08:36.031170", "end_time": "2024-12-04T08:08:41.693301" }, { "trace_id": "7d578a6edac62f204ab479fba82f77b6", "root_span_id": "1d935e3362676896", "start_time": "2024-12-04T08:08:41.695204", "end_time": "2024-12-04T08:08:47.228016" }, { "trace_id": "dbd767d76991bc816f9f078907dc9ff2", "root_span_id": "f5a7ee76683b9602", "start_time": "2024-12-04T08:08:47.234578", "end_time": "2024-12-04T08:08:53.189412" } ] curl -X POST 'http://localhost:5000/alpha/telemetry/get-span-tree' \ -H 'Content-Type: application/json' \ -d '{ "span_id" : "6cceb4b48a156913", "max_depth": 2, "attributes_to_return": ["input"] }' | jq . % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 875 100 790 100 85 18462 1986 --:--:-- --:--:-- --:--:-- 20833 { "span_id": "6cceb4b48a156913", "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", "parent_span_id": "892a66d726c7f990", "name": "retrieve_rag_context", "start_time": "2024-12-04T09:28:21.781995", "end_time": "2024-12-04T09:28:21.913352", "attributes": { "input": [ "{\"role\":\"system\",\"content\":\"You are a helpful assistant\"}", "{\"role\":\"user\",\"content\":\"What are the top 5 topics that were explained in the documentation? Only list succinct bullet points.\",\"context\":null}" ] }, "children": [ { "span_id": "1a2df181854064a8", "trace_id": "dafa796f6aaf925f511c04cd7c67fdda", "parent_span_id": "6cceb4b48a156913", "name": "MemoryRouter.query_documents", "start_time": "2024-12-04T09:28:21.787620", "end_time": "2024-12-04T09:28:21.906512", "attributes": { "input": null }, "children": [], "status": "ok" } ], "status": "ok" } ``` <img width="1677" alt="Screenshot 2024-12-04 at 9 42 56 AM" src="https://github.com/user-attachments/assets/4d3cea93-05ce-415a-93d9-4b1628631bf8">
352 lines
10 KiB
Python
352 lines
10 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import Any, AsyncGenerator, Dict, List, Optional
|
|
|
|
from llama_stack.apis.datasetio.datasetio import DatasetIO
|
|
from llama_stack.apis.memory_banks.memory_banks import BankParams
|
|
from llama_stack.distribution.datatypes import RoutingTable
|
|
from llama_stack.apis.memory import * # noqa: F403
|
|
from llama_stack.apis.inference import * # noqa: F403
|
|
from llama_stack.apis.safety import * # noqa: F403
|
|
from llama_stack.apis.datasetio import * # noqa: F403
|
|
from llama_stack.apis.scoring import * # noqa: F403
|
|
from llama_stack.apis.eval import * # noqa: F403
|
|
|
|
|
|
class MemoryRouter(Memory):
|
|
"""Routes to an provider based on the memory bank identifier"""
|
|
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def register_memory_bank(
|
|
self,
|
|
memory_bank_id: str,
|
|
params: BankParams,
|
|
provider_id: Optional[str] = None,
|
|
provider_memorybank_id: Optional[str] = None,
|
|
) -> None:
|
|
await self.routing_table.register_memory_bank(
|
|
memory_bank_id,
|
|
params,
|
|
provider_id,
|
|
provider_memorybank_id,
|
|
)
|
|
|
|
async def insert_documents(
|
|
self,
|
|
bank_id: str,
|
|
documents: List[MemoryBankDocument],
|
|
ttl_seconds: Optional[int] = None,
|
|
) -> None:
|
|
return await self.routing_table.get_provider_impl(bank_id).insert_documents(
|
|
bank_id, documents, ttl_seconds
|
|
)
|
|
|
|
async def query_documents(
|
|
self,
|
|
bank_id: str,
|
|
query: InterleavedTextMedia,
|
|
params: Optional[Dict[str, Any]] = None,
|
|
) -> QueryDocumentsResponse:
|
|
return await self.routing_table.get_provider_impl(bank_id).query_documents(
|
|
bank_id, query, params
|
|
)
|
|
|
|
|
|
class InferenceRouter(Inference):
|
|
"""Routes to an provider based on the model"""
|
|
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def register_model(
|
|
self,
|
|
model_id: str,
|
|
provider_model_id: Optional[str] = None,
|
|
provider_id: Optional[str] = None,
|
|
metadata: Optional[Dict[str, Any]] = None,
|
|
) -> None:
|
|
await self.routing_table.register_model(
|
|
model_id, provider_model_id, provider_id, metadata
|
|
)
|
|
|
|
async def chat_completion(
|
|
self,
|
|
model_id: str,
|
|
messages: List[Message],
|
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
|
response_format: Optional[ResponseFormat] = None,
|
|
tools: Optional[List[ToolDefinition]] = None,
|
|
tool_choice: Optional[ToolChoice] = ToolChoice.auto,
|
|
tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
|
|
stream: Optional[bool] = False,
|
|
logprobs: Optional[LogProbConfig] = None,
|
|
) -> AsyncGenerator:
|
|
params = dict(
|
|
model_id=model_id,
|
|
messages=messages,
|
|
sampling_params=sampling_params,
|
|
tools=tools or [],
|
|
tool_choice=tool_choice,
|
|
tool_prompt_format=tool_prompt_format,
|
|
response_format=response_format,
|
|
stream=stream,
|
|
logprobs=logprobs,
|
|
)
|
|
provider = self.routing_table.get_provider_impl(model_id)
|
|
if stream:
|
|
return (chunk async for chunk in await provider.chat_completion(**params))
|
|
else:
|
|
return await provider.chat_completion(**params)
|
|
|
|
async def completion(
|
|
self,
|
|
model_id: str,
|
|
content: InterleavedTextMedia,
|
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
|
response_format: Optional[ResponseFormat] = None,
|
|
stream: Optional[bool] = False,
|
|
logprobs: Optional[LogProbConfig] = None,
|
|
) -> AsyncGenerator:
|
|
provider = self.routing_table.get_provider_impl(model_id)
|
|
params = dict(
|
|
model_id=model_id,
|
|
content=content,
|
|
sampling_params=sampling_params,
|
|
response_format=response_format,
|
|
stream=stream,
|
|
logprobs=logprobs,
|
|
)
|
|
if stream:
|
|
return (chunk async for chunk in await provider.completion(**params))
|
|
else:
|
|
return await provider.completion(**params)
|
|
|
|
async def embeddings(
|
|
self,
|
|
model_id: str,
|
|
contents: List[InterleavedTextMedia],
|
|
) -> EmbeddingsResponse:
|
|
return await self.routing_table.get_provider_impl(model_id).embeddings(
|
|
model_id=model_id,
|
|
contents=contents,
|
|
)
|
|
|
|
|
|
class SafetyRouter(Safety):
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def register_shield(
|
|
self,
|
|
shield_id: str,
|
|
provider_shield_id: Optional[str] = None,
|
|
provider_id: Optional[str] = None,
|
|
params: Optional[Dict[str, Any]] = None,
|
|
) -> Shield:
|
|
return await self.routing_table.register_shield(
|
|
shield_id, provider_shield_id, provider_id, params
|
|
)
|
|
|
|
async def run_shield(
|
|
self,
|
|
shield_id: str,
|
|
messages: List[Message],
|
|
params: Dict[str, Any] = None,
|
|
) -> RunShieldResponse:
|
|
return await self.routing_table.get_provider_impl(shield_id).run_shield(
|
|
shield_id=shield_id,
|
|
messages=messages,
|
|
params=params,
|
|
)
|
|
|
|
|
|
class DatasetIORouter(DatasetIO):
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def get_rows_paginated(
|
|
self,
|
|
dataset_id: str,
|
|
rows_in_page: int,
|
|
page_token: Optional[str] = None,
|
|
filter_condition: Optional[str] = None,
|
|
) -> PaginatedRowsResult:
|
|
return await self.routing_table.get_provider_impl(
|
|
dataset_id
|
|
).get_rows_paginated(
|
|
dataset_id=dataset_id,
|
|
rows_in_page=rows_in_page,
|
|
page_token=page_token,
|
|
filter_condition=filter_condition,
|
|
)
|
|
|
|
async def append_rows(self, dataset_id: str, rows: List[Dict[str, Any]]) -> None:
|
|
return await self.routing_table.get_provider_impl(dataset_id).append_rows(
|
|
dataset_id=dataset_id,
|
|
rows=rows,
|
|
)
|
|
|
|
|
|
class ScoringRouter(Scoring):
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def score_batch(
|
|
self,
|
|
dataset_id: str,
|
|
scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
|
|
save_results_dataset: bool = False,
|
|
) -> ScoreBatchResponse:
|
|
res = {}
|
|
for fn_identifier in scoring_functions.keys():
|
|
score_response = await self.routing_table.get_provider_impl(
|
|
fn_identifier
|
|
).score_batch(
|
|
dataset_id=dataset_id,
|
|
scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
|
|
)
|
|
res.update(score_response.results)
|
|
|
|
if save_results_dataset:
|
|
raise NotImplementedError("Save results dataset not implemented yet")
|
|
|
|
return ScoreBatchResponse(
|
|
results=res,
|
|
)
|
|
|
|
async def score(
|
|
self,
|
|
input_rows: List[Dict[str, Any]],
|
|
scoring_functions: Dict[str, Optional[ScoringFnParams]] = None,
|
|
) -> ScoreResponse:
|
|
res = {}
|
|
# look up and map each scoring function to its provider impl
|
|
for fn_identifier in scoring_functions.keys():
|
|
score_response = await self.routing_table.get_provider_impl(
|
|
fn_identifier
|
|
).score(
|
|
input_rows=input_rows,
|
|
scoring_functions={fn_identifier: scoring_functions[fn_identifier]},
|
|
)
|
|
res.update(score_response.results)
|
|
|
|
return ScoreResponse(results=res)
|
|
|
|
|
|
class EvalRouter(Eval):
|
|
def __init__(
|
|
self,
|
|
routing_table: RoutingTable,
|
|
) -> None:
|
|
self.routing_table = routing_table
|
|
|
|
async def initialize(self) -> None:
|
|
pass
|
|
|
|
async def shutdown(self) -> None:
|
|
pass
|
|
|
|
async def run_eval(
|
|
self,
|
|
task_id: str,
|
|
task_config: AppEvalTaskConfig,
|
|
) -> Job:
|
|
return await self.routing_table.get_provider_impl(task_id).run_eval(
|
|
task_id=task_id,
|
|
task_config=task_config,
|
|
)
|
|
|
|
@webmethod(route="/eval/evaluate_rows", method="POST")
|
|
async def evaluate_rows(
|
|
self,
|
|
task_id: str,
|
|
input_rows: List[Dict[str, Any]],
|
|
scoring_functions: List[str],
|
|
task_config: EvalTaskConfig,
|
|
) -> EvaluateResponse:
|
|
return await self.routing_table.get_provider_impl(task_id).evaluate_rows(
|
|
task_id=task_id,
|
|
input_rows=input_rows,
|
|
scoring_functions=scoring_functions,
|
|
task_config=task_config,
|
|
)
|
|
|
|
async def job_status(
|
|
self,
|
|
task_id: str,
|
|
job_id: str,
|
|
) -> Optional[JobStatus]:
|
|
return await self.routing_table.get_provider_impl(task_id).job_status(
|
|
task_id, job_id
|
|
)
|
|
|
|
async def job_cancel(
|
|
self,
|
|
task_id: str,
|
|
job_id: str,
|
|
) -> None:
|
|
await self.routing_table.get_provider_impl(task_id).job_cancel(
|
|
task_id,
|
|
job_id,
|
|
)
|
|
|
|
async def job_result(
|
|
self,
|
|
task_id: str,
|
|
job_id: str,
|
|
) -> EvaluateResponse:
|
|
return await self.routing_table.get_provider_impl(task_id).job_result(
|
|
task_id,
|
|
job_id,
|
|
)
|