mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-31 16:01:46 +00:00
implemented check health for cloud client
This commit is contained in:
parent
022f20e710
commit
cc29fc0fe8
1 changed files with 25 additions and 25 deletions
|
@ -28,7 +28,7 @@
|
||||||
"id": "bfac8382",
|
"id": "bfac8382",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"### 0. Configuration\n",
|
"### 1. Configuration\n",
|
||||||
"Set up your connection parameters:"
|
"Set up your connection parameters:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
@ -49,7 +49,7 @@
|
||||||
"id": "df89cff7",
|
"id": "df89cff7",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### 1. Set Up Local and Cloud Clients\n",
|
"#### 2. Set Up Local and Cloud Clients\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Initialize both clients, specifying the `base_url` for each instance. In this case, we have the local distribution running on `http://localhost:5000` and the cloud distribution running on `http://localhost:5001`.\n"
|
"Initialize both clients, specifying the `base_url` for each instance. In this case, we have the local distribution running on `http://localhost:5000` and the cloud distribution running on `http://localhost:5001`.\n"
|
||||||
]
|
]
|
||||||
|
@ -73,7 +73,7 @@
|
||||||
"id": "894689c1",
|
"id": "894689c1",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### 2. Client Selection with Fallback\n",
|
"#### 3. Client Selection with Fallback\n",
|
||||||
"\n",
|
"\n",
|
||||||
"The `select_client` function checks if the local client is available using a lightweight `/health` check. If the local client is unavailable, it automatically switches to the cloud client.\n"
|
"The `select_client` function checks if the local client is available using a lightweight `/health` check. If the local client is unavailable, it automatically switches to the cloud client.\n"
|
||||||
]
|
]
|
||||||
|
@ -96,28 +96,28 @@
|
||||||
"import httpx\n",
|
"import httpx\n",
|
||||||
"from termcolor import cprint\n",
|
"from termcolor import cprint\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"async def check_client_health(client, client_name: str) -> bool:\n",
|
||||||
|
" try:\n",
|
||||||
|
" async with httpx.AsyncClient() as http_client:\n",
|
||||||
|
" response = await http_client.get(f'{client.base_url}/health')\n",
|
||||||
|
" if response.status_code == 200:\n",
|
||||||
|
" cprint(f'Using {client_name} client.', 'yellow')\n",
|
||||||
|
" return True\n",
|
||||||
|
" else:\n",
|
||||||
|
" cprint(f'{client_name} client health check failed.', 'red')\n",
|
||||||
|
" return False\n",
|
||||||
|
" except httpx.RequestError:\n",
|
||||||
|
" cprint(f'Failed to connect to {client_name} client.', 'red')\n",
|
||||||
|
" return False\n",
|
||||||
|
"\n",
|
||||||
"async def select_client(use_local: bool) -> LlamaStackClient:\n",
|
"async def select_client(use_local: bool) -> LlamaStackClient:\n",
|
||||||
" \"\"\"\n",
|
" if use_local and await check_client_health(local_client, 'local'):\n",
|
||||||
" Selects the client based on the use_local flag.\n",
|
" return local_client\n",
|
||||||
"\n",
|
"\n",
|
||||||
" Parameters:\n",
|
" if await check_client_health(cloud_client, 'cloud'):\n",
|
||||||
" - use_local: bool, True to try the local client, False to use the cloud client.\n",
|
" return cloud_client\n",
|
||||||
"\n",
|
"\n",
|
||||||
" Returns:\n",
|
" raise ConnectionError('Unable to connect to any client.')\n",
|
||||||
" - LlamaStackClient: the selected client instance.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" if use_local:\n",
|
|
||||||
" try:\n",
|
|
||||||
" async with httpx.AsyncClient() as http_client:\n",
|
|
||||||
" response = await http_client.get(f'{local_client.base_url}/health')\n",
|
|
||||||
" if response.status_code == 200:\n",
|
|
||||||
" cprint('Using local client.', 'yellow')\n",
|
|
||||||
" return local_client\n",
|
|
||||||
" except httpx.RequestError:\n",
|
|
||||||
" cprint('Failed to connect to local client.', 'red')\n",
|
|
||||||
"\n",
|
|
||||||
" cprint('Using cloud client.', 'yellow')\n",
|
|
||||||
" return cloud_client\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"# Example usage: pass True for local, False for cloud\n",
|
"# Example usage: pass True for local, False for cloud\n",
|
||||||
"client = await select_client(use_local=True)\n"
|
"client = await select_client(use_local=True)\n"
|
||||||
|
@ -128,7 +128,7 @@
|
||||||
"id": "9ccfe66f",
|
"id": "9ccfe66f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### 3. Generate a Response\n",
|
"#### 4. Generate a Response\n",
|
||||||
"\n",
|
"\n",
|
||||||
"After selecting the client, you can generate text using `chat_completion`. This example sends a sample prompt to the model and prints the response.\n"
|
"After selecting the client, you can generate text using `chat_completion`. This example sends a sample prompt to the model and prints the response.\n"
|
||||||
]
|
]
|
||||||
|
@ -169,7 +169,7 @@
|
||||||
"id": "6edf5e57",
|
"id": "6edf5e57",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### 4. Run with Cloud Model\n",
|
"#### 5. Run with Cloud Model\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Use `asyncio.run()` to execute `get_llama_response` in an asynchronous event loop.\n"
|
"Use `asyncio.run()` to execute `get_llama_response` in an asynchronous event loop.\n"
|
||||||
]
|
]
|
||||||
|
@ -206,7 +206,7 @@
|
||||||
"id": "5c433511-9321-4718-ab7f-e21cf6b5ca79",
|
"id": "5c433511-9321-4718-ab7f-e21cf6b5ca79",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"#### 4. Run with Local Model\n"
|
"#### 6. Run with Local Model\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue