forked from phoenix-oss/llama-stack-mirror
formatting
This commit is contained in:
parent
94dfa293a6
commit
b6ccaf1778
33 changed files with 110 additions and 97 deletions
|
@ -97,27 +97,30 @@ class BatchChatCompletionResponse(BaseModel):
|
|||
|
||||
|
||||
class Inference(Protocol):
|
||||
|
||||
@webmethod(route="/inference/completion")
|
||||
async def completion(
|
||||
self,
|
||||
request: CompletionRequest,
|
||||
) -> Union[CompletionResponse, CompletionResponseStreamChunk]: ...
|
||||
) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/chat_completion")
|
||||
async def chat_completion(
|
||||
self,
|
||||
request: ChatCompletionRequest,
|
||||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
|
||||
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]:
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch_completion")
|
||||
async def batch_completion(
|
||||
self,
|
||||
request: BatchCompletionRequest,
|
||||
) -> BatchCompletionResponse: ...
|
||||
) -> BatchCompletionResponse:
|
||||
...
|
||||
|
||||
@webmethod(route="/inference/batch_chat_completion")
|
||||
async def batch_chat_completion(
|
||||
self,
|
||||
request: BatchChatCompletionRequest,
|
||||
) -> BatchChatCompletionResponse: ...
|
||||
) -> BatchChatCompletionResponse:
|
||||
...
|
||||
|
|
|
@ -7,8 +7,8 @@
|
|||
from termcolor import cprint
|
||||
|
||||
from llama_toolchain.inference.api import (
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk
|
||||
ChatCompletionResponseEventType,
|
||||
ChatCompletionResponseStreamChunk,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -45,7 +45,6 @@ SEMAPHORE = asyncio.Semaphore(1)
|
|||
|
||||
|
||||
class MetaReferenceInferenceImpl(Inference):
|
||||
|
||||
def __init__(self, config: MetaReferenceImplConfig) -> None:
|
||||
self.config = config
|
||||
model = resolve_model(config.model)
|
||||
|
|
|
@ -54,7 +54,6 @@ async def get_provider_impl(
|
|||
|
||||
|
||||
class OllamaInference(Inference):
|
||||
|
||||
def __init__(self, config: OllamaImplConfig) -> None:
|
||||
self.config = config
|
||||
|
||||
|
@ -66,7 +65,9 @@ class OllamaInference(Inference):
|
|||
try:
|
||||
await self.client.ps()
|
||||
except httpx.ConnectError:
|
||||
raise RuntimeError("Ollama Server is not running, start it using `ollama serve` in a separate terminal")
|
||||
raise RuntimeError(
|
||||
"Ollama Server is not running, start it using `ollama serve` in a separate terminal"
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue