Merge branch 'main' into prompt-api

This commit is contained in:
Francisco Arceo 2025-09-04 21:48:57 -06:00 committed by GitHub
commit 14f7b0d843
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 92 additions and 13 deletions

View file

@ -527,7 +527,7 @@ class InferenceRouter(Inference):
# Store the response with the ID that will be returned to the client
if self.store:
await self.store.store_chat_completion(response, messages)
asyncio.create_task(self.store.store_chat_completion(response, messages))
if self.telemetry:
metrics = self._construct_metrics(
@ -855,4 +855,4 @@ class InferenceRouter(Inference):
object="chat.completion",
)
logger.debug(f"InferenceRouter.completion_response: {final_response}")
await self.store.store_chat_completion(final_response, messages)
asyncio.create_task(self.store.store_chat_completion(final_response, messages))

View file

@ -141,6 +141,8 @@ def translate_exception(exc: Exception) -> HTTPException | RequestValidationErro
return HTTPException(status_code=httpx.codes.BAD_REQUEST, detail=str(exc))
elif isinstance(exc, PermissionError | AccessDeniedError):
return HTTPException(status_code=httpx.codes.FORBIDDEN, detail=f"Permission denied: {str(exc)}")
elif isinstance(exc, ConnectionError | httpx.ConnectError):
return HTTPException(status_code=httpx.codes.BAD_GATEWAY, detail=str(exc))
elif isinstance(exc, asyncio.TimeoutError | TimeoutError):
return HTTPException(status_code=httpx.codes.GATEWAY_TIMEOUT, detail=f"Operation timed out: {str(exc)}")
elif isinstance(exc, NotImplementedError):