forked from phoenix-oss/llama-stack-mirror
		
	# What does this PR do? Inference router computes the token usage related metrics for all providers and returns the metrics as part of response and also logs to telemetry. ## Test Plan LLAMA_STACK_DISABLE_VERSION_CHECK=true llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml ``` curl --request POST \ --url http://localhost:8321/v1/inference/chat-completion \ --header 'content-type: application/json' \ --data '{ "model_id": "meta-llama/Llama-3.1-70B-Instruct", "messages": [ { "role": "user", "content": { "type": "text", "text": "where do humans live" } } ], "stream": false }' | jq . { "metrics": [ { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770903Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "prompt_tokens", "value": 10, "unit": "tokens" }, { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770916Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "completion_tokens", "value": 411, "unit": "tokens" }, { "trace_id": "yjv1tf0jS1evOyPm", "span_id": "WqYKvg0_", "timestamp": "2025-02-27T18:55:10.770919Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "total_tokens", "value": 421, "unit": "tokens" } ], "completion_message": { "role": "assistant", "content": "Humans live in various parts of the world, inhabiting almost every continent, country, and region. Here's a breakdown of where humans live:\n\n1. **Continents:** Humans inhabit all seven continents:\n\t* Africa\n\t* Antarctica (research stations only)\n\t* Asia\n\t* Australia\n\t* Europe\n\t* North America\n\t* South America\n2. **Countries:** There are 196 countries recognized by the United Nations, and humans live in almost all of them.\n3. **Regions:** Humans live in diverse regions, including:\n\t* Deserts (e.g., Sahara, Mojave)\n\t* Forests (e.g., Amazon, Congo)\n\t* Grasslands (e.g., Prairies, Steppes)\n\t* Mountains (e.g., Himalayas, Andes)\n\t* Oceans (e.g., coastal areas, islands)\n\t* Tundras (e.g., Arctic, sub-Arctic)\n4. **Cities and towns:** Many humans live in urban areas, such as cities and towns, which are often located near:\n\t* Coastlines\n\t* Rivers\n\t* Lakes\n\t* Mountains\n5. **Rural areas:** Some humans live in rural areas, such as:\n\t* Villages\n\t* Farms\n\t* Countryside\n6. **Islands:** Humans inhabit many islands, including:\n\t* Tropical islands (e.g., Hawaii, Maldives)\n\t* Arctic islands (e.g., Greenland, Iceland)\n\t* Continental islands (e.g., Great Britain, Ireland)\n7. **Extreme environments:** Humans also live in extreme environments, such as:\n\t* High-altitude areas (e.g., Tibet, Andes)\n\t* Low-altitude areas (e.g., Death Valley, Dead Sea)\n\t* Areas with extreme temperatures (e.g., Arctic, Sahara)\n\nOverall, humans have adapted to live in a wide range of environments and ecosystems around the world.", "stop_reason": "end_of_turn", "tool_calls": [] }, "logprobs": null } ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/integration/inference ======================================================================== short test summary info ========================================================================= FAILED tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=8B:vis=11B-inference:chat_completion:tool_calling_tools_absent-True] - ValueError: Unsupported tool prompt format: ToolPromptFormat.json FAILED tests/integration/inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[txt=8B:vis=11B-inference:chat_completion:tool_calling_tools_absent-False] - ValueError: Unsupported tool prompt format: ToolPromptFormat.json FAILED tests/integration/inference/test_vision_inference.py::test_image_chat_completion_non_streaming[txt=8B:vis=11B] - fireworks.client.error.InvalidRequestError: {'error': {'object': 'error', 'type': 'invalid_request_error', 'message': 'Failed to decode image cannot identify image f... FAILED tests/integration/inference/test_vision_inference.py::test_image_chat_completion_streaming[txt=8B:vis=11B] - fireworks.client.error.InvalidRequestError: {'error': {'object': 'error', 'type': 'invalid_request_error', 'message': 'Failed to decode image cannot identify image f... ========================================================= 4 failed, 16 passed, 23 xfailed, 17 warnings in 44.36s ========================================================= ```
		
			
				
	
	
		
			81 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			81 lines
		
	
	
	
		
			2.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| 
 | |
| from typing import Any, Dict
 | |
| 
 | |
| from llama_stack.distribution.datatypes import RoutedProtocol
 | |
| from llama_stack.distribution.store import DistributionRegistry
 | |
| from llama_stack.providers.datatypes import Api, RoutingTable
 | |
| 
 | |
| from .routing_tables import (
 | |
|     BenchmarksRoutingTable,
 | |
|     DatasetsRoutingTable,
 | |
|     ModelsRoutingTable,
 | |
|     ScoringFunctionsRoutingTable,
 | |
|     ShieldsRoutingTable,
 | |
|     ToolGroupsRoutingTable,
 | |
|     VectorDBsRoutingTable,
 | |
| )
 | |
| 
 | |
| 
 | |
| async def get_routing_table_impl(
 | |
|     api: Api,
 | |
|     impls_by_provider_id: Dict[str, RoutedProtocol],
 | |
|     _deps,
 | |
|     dist_registry: DistributionRegistry,
 | |
| ) -> Any:
 | |
|     api_to_tables = {
 | |
|         "vector_dbs": VectorDBsRoutingTable,
 | |
|         "models": ModelsRoutingTable,
 | |
|         "shields": ShieldsRoutingTable,
 | |
|         "datasets": DatasetsRoutingTable,
 | |
|         "scoring_functions": ScoringFunctionsRoutingTable,
 | |
|         "benchmarks": BenchmarksRoutingTable,
 | |
|         "tool_groups": ToolGroupsRoutingTable,
 | |
|     }
 | |
| 
 | |
|     if api.value not in api_to_tables:
 | |
|         raise ValueError(f"API {api.value} not found in router map")
 | |
| 
 | |
|     impl = api_to_tables[api.value](impls_by_provider_id, dist_registry)
 | |
|     await impl.initialize()
 | |
|     return impl
 | |
| 
 | |
| 
 | |
| async def get_auto_router_impl(api: Api, routing_table: RoutingTable, deps: Dict[str, Any]) -> Any:
 | |
|     from .routers import (
 | |
|         DatasetIORouter,
 | |
|         EvalRouter,
 | |
|         InferenceRouter,
 | |
|         SafetyRouter,
 | |
|         ScoringRouter,
 | |
|         ToolRuntimeRouter,
 | |
|         VectorIORouter,
 | |
|     )
 | |
| 
 | |
|     api_to_routers = {
 | |
|         "vector_io": VectorIORouter,
 | |
|         "inference": InferenceRouter,
 | |
|         "safety": SafetyRouter,
 | |
|         "datasetio": DatasetIORouter,
 | |
|         "scoring": ScoringRouter,
 | |
|         "eval": EvalRouter,
 | |
|         "tool_runtime": ToolRuntimeRouter,
 | |
|     }
 | |
|     api_to_deps = {
 | |
|         "inference": {"telemetry": Api.telemetry},
 | |
|     }
 | |
|     if api.value not in api_to_routers:
 | |
|         raise ValueError(f"API {api.value} not found in router map")
 | |
| 
 | |
|     api_to_dep_impl = {}
 | |
|     for dep_name, dep_api in api_to_deps.get(api.value, {}).items():
 | |
|         if dep_api in deps:
 | |
|             api_to_dep_impl[dep_name] = deps[dep_api]
 | |
| 
 | |
|     impl = api_to_routers[api.value](routing_table, **api_to_dep_impl)
 | |
|     await impl.initialize()
 | |
|     return impl
 |