mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-04 20:14:13 +00:00
adding support health check to deploy it in k8s
This commit is contained in:
parent
7f14fd8ecd
commit
06c6b54529
2 changed files with 13 additions and 1 deletions
|
@ -15,6 +15,7 @@ from collections.abc import (
|
||||||
AsyncIterator as AsyncIteratorABC,
|
AsyncIterator as AsyncIteratorABC,
|
||||||
)
|
)
|
||||||
from contextlib import asynccontextmanager
|
from contextlib import asynccontextmanager
|
||||||
|
from http import HTTPStatus
|
||||||
from ssl import SSLError
|
from ssl import SSLError
|
||||||
from typing import (
|
from typing import (
|
||||||
Any,
|
Any,
|
||||||
|
@ -88,7 +89,7 @@ async def global_exception_handler(request: Request, exc: Exception):
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def translate_exception(exc: Exception) -> HTTPException:
|
def translate_exception(exc: Exception) -> Union[HTTPException, RequestValidationError]:
|
||||||
if isinstance(exc, ValidationError):
|
if isinstance(exc, ValidationError):
|
||||||
exc = RequestValidationError(exc.raw_errors)
|
exc = RequestValidationError(exc.raw_errors)
|
||||||
|
|
||||||
|
@ -407,12 +408,22 @@ async def resolve_impls_with_routing(run_config: StackRunConfig) -> Dict[Api, An
|
||||||
return impls, specs
|
return impls, specs
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False):
|
def main(yaml_config: str, port: int = 5000, disable_ipv6: bool = False):
|
||||||
with open(yaml_config, "r") as fp:
|
with open(yaml_config, "r") as fp:
|
||||||
config = StackRunConfig(**yaml.safe_load(fp))
|
config = StackRunConfig(**yaml.safe_load(fp))
|
||||||
|
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
|
# Health check is added to enable deploying the docker container image on Kubernetes which require
|
||||||
|
# a health check that can return 200 for readiness and liveness check
|
||||||
|
class HealthCheck(BaseModel):
|
||||||
|
status: str = "OK"
|
||||||
|
|
||||||
|
@app.get("/healthcheck", status_code=HTTPStatus.OK, response_model=HealthCheck)
|
||||||
|
async def healthcheck():
|
||||||
|
return HealthCheck(status="OK")
|
||||||
|
|
||||||
impls, specs = asyncio.run(resolve_impls_with_routing(config))
|
impls, specs = asyncio.run(resolve_impls_with_routing(config))
|
||||||
if Api.telemetry in impls:
|
if Api.telemetry in impls:
|
||||||
setup_logger(impls[Api.telemetry])
|
setup_logger(impls[Api.telemetry])
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
import pydantic
|
||||||
from together import Together
|
from together import Together
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue