mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 02:53:30 +00:00
# What does this PR do? This PR adds back the changes in #1300 which were reverted in #1476 . It also adds logic to preserve context variables across asyncio boundary. this is needed with the library client since the async generator logic yields control to code outside the event loop, and on resuming, does not have the same context as before and this requires preserving the context vars. address #1477 ## Test Plan ``` curl --request POST \ --url http://localhost:8321/v1/inference/chat-completion \ --header 'content-type: application/json' \ --data '{ "model_id": "meta-llama/Llama-3.1-70B-Instruct", "messages": [ { "role": "user", "content": { "type": "text", "text": "where do humans live" } } ], "stream": false }' | jq . { "metrics": [ { "trace_id": "kCZwO3tyQC-FuAGb", "span_id": "bsP_5a5O", "timestamp": "2025-03-11T16:47:38.549084Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "prompt_tokens", "value": 10, "unit": "tokens" }, { "trace_id": "kCZwO3tyQC-FuAGb", "span_id": "bsP_5a5O", "timestamp": "2025-03-11T16:47:38.549449Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "completion_tokens", "value": 369, "unit": "tokens" }, { "trace_id": "kCZwO3tyQC-FuAGb", "span_id": "bsP_5a5O", "timestamp": "2025-03-11T16:47:38.549457Z", "attributes": { "model_id": "meta-llama/Llama-3.1-70B-Instruct", "provider_id": "fireworks" }, "type": "metric", "metric": "total_tokens", "value": 379, "unit": "tokens" } ], "completion_message": { "role": "assistant", "content": "Humans live on the planet Earth, specifically on its landmasses and in its oceans. Here's a breakdown of where humans live:\n\n1. **Continents:** Humans inhabit all seven continents:\n\t* Africa\n\t* Antarctica ( temporary residents, mostly scientists and researchers)\n\t* Asia\n\t* Australia\n\t* Europe\n\t* North America\n\t* South America\n2. **Countries:** There are 196 countries recognized by the United Nations, and humans live in almost all of them.\n3. **Cities and towns:** Many humans live in urban areas, such as cities and towns, which are often located near coastlines, rivers, or other bodies of water.\n4. **Rural areas:** Some humans live in rural areas, such as villages, farms, and countryside.\n5. **Islands:** Humans inhabit many islands around the world, including those in the Pacific, Indian, and Atlantic Oceans.\n6. **Mountains and highlands:** Humans live in mountainous regions, such as the Himalayas, the Andes, and the Rocky Mountains.\n7. **Deserts:** Some humans live in desert regions, such as the Sahara, the Mojave, and the Atacama.\n8. **Coastal areas:** Many humans live in coastal areas, such as beaches, ports, and coastal cities.\n9. **Underwater habitats:** A few humans live in underwater habitats, such as research stations and submarines.\n10. **Space:** A small number of humans have lived in space, including astronauts on the International Space Station and those who have visited the Moon.\n\nOverall, humans can be found living in almost every environment on Earth, from the frozen tundra to the hottest deserts, and from the highest mountains to the deepest oceans.", "stop_reason": "end_of_turn", "tool_calls": [] }, "logprobs": null } ``` Orignal repro no longer showing any error: ``` LLAMA_STACK_DISABLE_VERSION_CHECK=true llama stack run ~/.llama/distributions/fireworks/fireworks-run.yaml python -m examples.agents.e2e_loop_with_client_tools localhost 8321 ``` client logs: https://gist.github.com/dineshyv/047c7e87b18a5792aa660e311ea53166 server logs: https://gist.github.com/dineshyv/97a2174099619e9916c7c490be26e559
86 lines
2.6 KiB
Python
86 lines
2.6 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
import contextvars
|
|
import json
|
|
import logging
|
|
from typing import Any, ContextManager, Dict, Optional
|
|
|
|
from .utils.dynamic import instantiate_class_type
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
# Context variable for request provider data
|
|
PROVIDER_DATA_VAR = contextvars.ContextVar("provider_data", default=None)
|
|
|
|
|
|
class RequestProviderDataContext(ContextManager):
|
|
"""Context manager for request provider data"""
|
|
|
|
def __init__(self, provider_data: Optional[Dict[str, Any]] = None):
|
|
self.provider_data = provider_data
|
|
self.token = None
|
|
|
|
def __enter__(self):
|
|
# Save the current value and set the new one
|
|
self.token = PROVIDER_DATA_VAR.set(self.provider_data)
|
|
return self
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
# Restore the previous value
|
|
if self.token is not None:
|
|
PROVIDER_DATA_VAR.reset(self.token)
|
|
|
|
|
|
class NeedsRequestProviderData:
|
|
def get_request_provider_data(self) -> Any:
|
|
spec = self.__provider_spec__
|
|
assert spec, f"Provider spec not set on {self.__class__}"
|
|
|
|
provider_type = spec.provider_type
|
|
validator_class = spec.provider_data_validator
|
|
if not validator_class:
|
|
raise ValueError(f"Provider {provider_type} does not have a validator")
|
|
|
|
val = PROVIDER_DATA_VAR.get()
|
|
if not val:
|
|
return None
|
|
|
|
validator = instantiate_class_type(validator_class)
|
|
try:
|
|
provider_data = validator(**val)
|
|
return provider_data
|
|
except Exception as e:
|
|
log.error(f"Error parsing provider data: {e}")
|
|
return None
|
|
|
|
|
|
def parse_request_provider_data(headers: Dict[str, str]) -> Optional[Dict[str, Any]]:
|
|
"""Parse provider data from request headers"""
|
|
keys = [
|
|
"X-LlamaStack-Provider-Data",
|
|
"x-llamastack-provider-data",
|
|
]
|
|
val = None
|
|
for key in keys:
|
|
val = headers.get(key, None)
|
|
if val:
|
|
break
|
|
|
|
if not val:
|
|
return None
|
|
|
|
try:
|
|
return json.loads(val)
|
|
except json.JSONDecodeError:
|
|
log.error("Provider data not encoded as a JSON object!")
|
|
return None
|
|
|
|
|
|
def request_provider_data_context(headers: Dict[str, str]) -> ContextManager:
|
|
"""Context manager that sets request provider data from headers for the duration of the context"""
|
|
provider_data = parse_request_provider_data(headers)
|
|
return RequestProviderDataContext(provider_data)
|