forked from phoenix-oss/llama-stack-mirror
Fix precommit check after moving to ruff (#927)
Lint check in main branch is failing. This fixes the lint check after we moved to ruff in https://github.com/meta-llama/llama-stack/pull/921. We need to move to a `ruff.toml` file as well as fixing and ignoring some additional checks. Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
parent
4773092dd1
commit
34ab7a3b6c
217 changed files with 981 additions and 2681 deletions
|
@ -55,47 +55,33 @@ class ProcessingMessageName(str, Enum):
|
|||
|
||||
|
||||
class ReadyRequest(BaseModel):
|
||||
type: Literal[ProcessingMessageName.ready_request] = (
|
||||
ProcessingMessageName.ready_request
|
||||
)
|
||||
type: Literal[ProcessingMessageName.ready_request] = ProcessingMessageName.ready_request
|
||||
|
||||
|
||||
class ReadyResponse(BaseModel):
|
||||
type: Literal[ProcessingMessageName.ready_response] = (
|
||||
ProcessingMessageName.ready_response
|
||||
)
|
||||
type: Literal[ProcessingMessageName.ready_response] = ProcessingMessageName.ready_response
|
||||
|
||||
|
||||
class EndSentinel(BaseModel):
|
||||
type: Literal[ProcessingMessageName.end_sentinel] = (
|
||||
ProcessingMessageName.end_sentinel
|
||||
)
|
||||
type: Literal[ProcessingMessageName.end_sentinel] = ProcessingMessageName.end_sentinel
|
||||
|
||||
|
||||
class CancelSentinel(BaseModel):
|
||||
type: Literal[ProcessingMessageName.cancel_sentinel] = (
|
||||
ProcessingMessageName.cancel_sentinel
|
||||
)
|
||||
type: Literal[ProcessingMessageName.cancel_sentinel] = ProcessingMessageName.cancel_sentinel
|
||||
|
||||
|
||||
class TaskRequest(BaseModel):
|
||||
type: Literal[ProcessingMessageName.task_request] = (
|
||||
ProcessingMessageName.task_request
|
||||
)
|
||||
type: Literal[ProcessingMessageName.task_request] = ProcessingMessageName.task_request
|
||||
task: Union[CompletionRequestWithRawContent, ChatCompletionRequestWithRawContent]
|
||||
|
||||
|
||||
class TaskResponse(BaseModel):
|
||||
type: Literal[ProcessingMessageName.task_response] = (
|
||||
ProcessingMessageName.task_response
|
||||
)
|
||||
type: Literal[ProcessingMessageName.task_response] = ProcessingMessageName.task_response
|
||||
result: TokenResult
|
||||
|
||||
|
||||
class ExceptionResponse(BaseModel):
|
||||
type: Literal[ProcessingMessageName.exception_response] = (
|
||||
ProcessingMessageName.exception_response
|
||||
)
|
||||
type: Literal[ProcessingMessageName.exception_response] = ProcessingMessageName.exception_response
|
||||
error: str
|
||||
|
||||
|
||||
|
@ -189,9 +175,7 @@ def retrieve_requests(reply_socket_url: str):
|
|||
group=get_model_parallel_group(),
|
||||
)
|
||||
if isinstance(updates[0], CancelSentinel):
|
||||
log.info(
|
||||
"quitting generation loop because request was cancelled"
|
||||
)
|
||||
log.info("quitting generation loop because request was cancelled")
|
||||
break
|
||||
|
||||
if mp_rank_0():
|
||||
|
@ -350,9 +334,7 @@ class ModelParallelProcessGroup:
|
|||
|
||||
def run_inference(
|
||||
self,
|
||||
req: Union[
|
||||
CompletionRequestWithRawContent, ChatCompletionRequestWithRawContent
|
||||
],
|
||||
req: Union[CompletionRequestWithRawContent, ChatCompletionRequestWithRawContent],
|
||||
) -> Generator:
|
||||
assert not self.running, "inference already running"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue