mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-23 03:22:26 +00:00
improvements to include other ollama methods like ps, list and pull
This commit is contained in:
parent
1a21c4b695
commit
b578f9aec1
23 changed files with 6748 additions and 5711 deletions
|
|
@ -77,13 +77,13 @@ class InferenceRouter(Inference):
|
|||
self.tokenizer = Tokenizer.get_instance()
|
||||
self.formatter = ChatFormat(self.tokenizer)
|
||||
|
||||
self.recording_context = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.debug("InferenceRouter.initialize")
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
logger.debug("InferenceRouter.shutdown")
|
||||
pass
|
||||
|
||||
async def register_model(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -94,6 +94,7 @@ RESOURCES = [
|
|||
|
||||
REGISTRY_REFRESH_INTERVAL_SECONDS = 300
|
||||
REGISTRY_REFRESH_TASK = None
|
||||
TEST_RECORDING_CONTEXT = None
|
||||
|
||||
|
||||
async def register_resources(run_config: StackRunConfig, impls: dict[Api, Any]):
|
||||
|
|
@ -307,6 +308,15 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
|
|||
async def construct_stack(
|
||||
run_config: StackRunConfig, provider_registry: ProviderRegistry | None = None
|
||||
) -> dict[Api, Any]:
|
||||
inference_mode = os.environ.get("LLAMA_STACK_TEST_INFERENCE_MODE", "live").lower()
|
||||
if inference_mode in ["record", "replay"]:
|
||||
global TEST_RECORDING_CONTEXT
|
||||
from llama_stack.testing.inference_recorder import setup_inference_recording
|
||||
|
||||
TEST_RECORDING_CONTEXT = setup_inference_recording()
|
||||
TEST_RECORDING_CONTEXT.__enter__()
|
||||
logger.info(f"Inference recording enabled: mode={inference_mode}")
|
||||
|
||||
dist_registry, _ = await create_dist_registry(run_config.metadata_store, run_config.image_name)
|
||||
policy = run_config.server.auth.access_policy if run_config.server.auth else []
|
||||
impls = await resolve_impls(
|
||||
|
|
@ -352,6 +362,13 @@ async def shutdown_stack(impls: dict[Api, Any]):
|
|||
except (Exception, asyncio.CancelledError) as e:
|
||||
logger.exception(f"Failed to shutdown {impl_name}: {e}")
|
||||
|
||||
global TEST_RECORDING_CONTEXT
|
||||
if TEST_RECORDING_CONTEXT:
|
||||
try:
|
||||
TEST_RECORDING_CONTEXT.__exit__(None, None, None)
|
||||
except Exception as e:
|
||||
logger.error(f"Error during inference recording cleanup: {e}")
|
||||
|
||||
global REGISTRY_REFRESH_TASK
|
||||
if REGISTRY_REFRESH_TASK:
|
||||
REGISTRY_REFRESH_TASK.cancel()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue