mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-08 23:01:04 +00:00
# What does this PR do? Inference router computes the token usage related metrics for all providers and returns the metrics as part of response and also logs to telemetry. ## Test Plan LLAMA_STACK_DISABLE_VERSION_CHECK=true llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml ``` curl --request POST \ --url http://localhost:8321/v1/inference/chat-completion \ --header 'content-type: application/json' \ --data '{ "model_id": "meta-llama/Llama-3.1-70B-Instruct", "messages": [ { "role": "user", "content": { "type": "text", "text": "where do humans live" } } ], "stream": false }' | jq . { "metrics": [ { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770903Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "prompt_tokens", "value": 10, "unit": "tokens" }, { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770916Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "completion_tokens", "value": 411, "unit": "tokens" }, { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770919Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "total_tokens", "value": 421, "unit": "tokens" } ], "completion_message": { "role": "assistant", "content": "Humans live in various parts of the world, inhabiting almost every continent, country, and region. Here's a breakdown of where humans live:\n\n1. **Continents:** Humans inhabit all seven continents:\n\t* Africa\n\t* Antarctica (research stations only)\n\t* Asia\n\t* Australia\n\t* Europe\n\t* North America\n\t* South America\n2. **Countries:** There are 196 countries recognized by the United Nations, and humans live in almost all of them.\n3. **Regions:** Humans live in diverse regions, including:\n\t* Deserts (e.g., Sahara, Mojave)\n\t* Forests (e.g., Amazon, Congo)\n\t* Grasslands (e.g., Prairies, Steppes)\n\t* Mountains (e.g., Himalayas, Andes)\n\t* Oceans (e.g., coastal areas, islands)\n\t* Tundras (e.g., Arctic, sub-Arctic)\n4. **Cities and towns:** Many humans live in urban areas, such as cities and towns, which are often located near:\n\t* Coastlines\n\t* Rivers\n\t* Lakes\n\t* Mountains\n5. **Rural areas:** Some humans live in rural areas, such as:\n\t* Villages\n\t* Farms\n\t* Countryside\n6. **Islands:** Humans inhabit many islands, including:\n\t* Tropical islands (e.g., Hawaii, Maldives)\n\t* Arctic islands (e.g., Greenland, Iceland)\n\t* Continental islands (e.g., Great Britain, Ireland)\n7. **Extreme environments:** Humans also live in extreme environments, such as:\n\t* High-altitude areas (e.g., Tibet, Andes)\n\t* Low-altitude areas (e.g., Death Valley, Dead Sea)\n\t* Areas with extreme temperatures (e.g., Arctic, Sahara)\n\nOverall, humans have adapted to live in a wide range of environments and ecosystems around the world.", "stop_reason": "end_of_turn", "tool_calls": [] }, "logprobs": null } ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/inference ======================================================================== short test summary info ========================================================================= FAILED tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=8B:vis=11B-inference:chat_completion:tool_calling_tools_absent-True] - ValueError: Unsupported tool prompt format: ToolPromptFormat.json FAILED tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=8B:vis=11B-inference:chat_completion:tool_calling_tools_absent-False] - ValueError: Unsupported tool prompt format: ToolPromptFormat.json FAILED tests/integration/inference/test_vision_inference.py::test_image_chat_completion_non_streaming[txt=8B:vis=11B] - fireworks.client.error.InvalidRequestError: {'error': {'object': 'error', 'type': 'invalid_request_error', 'message': 'Failed to decode image cannot identify image f... FAILED tests/integration/inference/test_vision_inference.py::test_image_chat_completion_streaming[txt=8B:vis=11B] - fireworks.client.error.InvalidRequestError: {'error': {'object': 'error', 'type': 'invalid_request_error', 'message': 'Failed to decode image cannot identify image f... ========================================================= 4 failed, 16 passed, 23 xfailed, 17 warnings in 44.36s ========================================================= ```
81 lines
2.4 KiB
Python
81 lines
2.4 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
from typing import Any, Dict
|
|
|
|
from llama_stack.distribution.datatypes import RoutedProtocol
|
|
from llama_stack.distribution.store import DistributionRegistry
|
|
from llama_stack.providers.datatypes import Api, RoutingTable
|
|
|
|
from .routing_tables import (
|
|
BenchmarksRoutingTable,
|
|
DatasetsRoutingTable,
|
|
ModelsRoutingTable,
|
|
ScoringFunctionsRoutingTable,
|
|
ShieldsRoutingTable,
|
|
ToolGroupsRoutingTable,
|
|
VectorDBsRoutingTable,
|
|
)
|
|
|
|
|
|
async def get_routing_table_impl(
|
|
api: Api,
|
|
impls_by_provider_id: Dict[str, RoutedProtocol],
|
|
_deps,
|
|
dist_registry: DistributionRegistry,
|
|
) -> Any:
|
|
api_to_tables = {
|
|
"vector_dbs": VectorDBsRoutingTable,
|
|
"models": ModelsRoutingTable,
|
|
"shields": ShieldsRoutingTable,
|
|
"datasets": DatasetsRoutingTable,
|
|
"scoring_functions": ScoringFunctionsRoutingTable,
|
|
"benchmarks": BenchmarksRoutingTable,
|
|
"tool_groups": ToolGroupsRoutingTable,
|
|
}
|
|
|
|
if api.value not in api_to_tables:
|
|
raise ValueError(f"API {api.value} not found in router map")
|
|
|
|
impl = api_to_tables[api.value](impls_by_provider_id, dist_registry)
|
|
await impl.initialize()
|
|
return impl
|
|
|
|
|
|
async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any:
|
|
from .routers import (
|
|
DatasetIORouter,
|
|
EvalRouter,
|
|
InferenceRouter,
|
|
SafetyRouter,
|
|
ScoringRouter,
|
|
ToolRuntimeRouter,
|
|
VectorIORouter,
|
|
)
|
|
|
|
api_to_routers = {
|
|
"vector_io": VectorIORouter,
|
|
"inference": InferenceRouter,
|
|
"safety": SafetyRouter,
|
|
"datasetio": DatasetIORouter,
|
|
"scoring": ScoringRouter,
|
|
"eval": EvalRouter,
|
|
"tool_runtime": ToolRuntimeRouter,
|
|
}
|
|
api_to_deps = {
|
|
"inference": {"telemetry": Api.telemetry},
|
|
}
|
|
if api.value not in api_to_routers:
|
|
raise ValueError(f"API {api.value} not found in router map")
|
|
|
|
api_to_dep_impl = {}
|
|
for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
|
|
if dep_api in deps:
|
|
api_to_dep_impl[dep_name] = deps[dep_api]
|
|
|
|
impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
|
|
await impl.initialize()
|
|
return impl
|