formatting

This commit is contained in:
Dalton Flanagan 2024-08-14 14:22:25 -04:00
parent 94dfa293a6
commit b6ccaf1778
33 changed files with 110 additions and 97 deletions

View file

@ -97,27 +97,30 @@ class BatchChatCompletionResponse(BaseModel):
class Inference(Protocol):
@webmethod(route="/inference/completion")
async def completion(
self,
request: CompletionRequest,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
...
@webmethod(route="/inference/chat_completion")
async def chat_completion(
self,
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]:
...
@webmethod(route="/inference/batch_completion")
async def batch_completion(
self,
request: BatchCompletionRequest,
) -> BatchCompletionResponse: ...
) -> BatchCompletionResponse:
...
@webmethod(route="/inference/batch_chat_completion")
async def batch_chat_completion(
self,
request: BatchChatCompletionRequest,
) -> BatchChatCompletionResponse: ...
) -> BatchChatCompletionResponse:
...

View file

@ -7,8 +7,8 @@
from termcolor import cprint
from llama_toolchain.inference.api import (
ChatCompletionResponseEventType,
ChatCompletionResponseStreamChunk
ChatCompletionResponseEventType,
ChatCompletionResponseStreamChunk,
)

View file

@ -45,7 +45,6 @@ SEMAPHORE = asyncio.Semaphore(1)
class MetaReferenceInferenceImpl(Inference):
def __init__(self, config: MetaReferenceImplConfig) -> None:
self.config = config
model = resolve_model(config.model)

View file

@ -54,7 +54,6 @@ async def get_provider_impl(
class OllamaInference(Inference):
def __init__(self, config: OllamaImplConfig) -> None:
self.config = config
@ -66,7 +65,9 @@ class OllamaInference(Inference):
try:
await self.client.ps()
except httpx.ConnectError:
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
raise RuntimeError(
"Ollama Server is not running, start it using `ollama serve` in a separate terminal"
)
async def shutdown(self) -> None:
pass