mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 15:23:51 +00:00
Add boilerplate for vllm inference provider
Signed-off-by: Russell Bryant <rbryant@redhat.com>
This commit is contained in:
parent
8d41e6caa9
commit
a08fd8f331
4 changed files with 49 additions and 5 deletions
|
@ -1,5 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
11
llama_stack/providers/impls/vllm/__init__.py
Normal file
11
llama_stack/providers/impls/vllm/__init__.py
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from .config import VLLMConfig
|
||||||
|
|
||||||
|
|
||||||
|
async def get_provider_impl(config: VLLMConfig, _deps) -> Any:
|
||||||
|
from .vllm import VLLMInferenceImpl
|
||||||
|
|
||||||
|
impl = VLLMInferenceImpl(config)
|
||||||
|
await impl.initialize()
|
||||||
|
return impl
|
5
llama_stack/providers/impls/vllm/config.py
Normal file
5
llama_stack/providers/impls/vllm/config.py
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
class VLLMConfig(BaseModel):
|
||||||
|
pass
|
33
llama_stack/providers/impls/vllm/vllm.py
Normal file
33
llama_stack/providers/impls/vllm/vllm.py
Normal file
|
@ -0,0 +1,33 @@
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from llama_stack.apis.inference.inference import CompletionResponse, CompletionResponseStreamChunk, LogProbConfig, ChatCompletionResponse, ChatCompletionResponseStreamChunk, EmbeddingsResponse
|
||||||
|
from llama_stack.apis.inference import Inference
|
||||||
|
|
||||||
|
from .config import VLLMConfig
|
||||||
|
|
||||||
|
from llama_models.llama3.api.datatypes import InterleavedTextMedia, Message, ToolChoice, ToolDefinition, ToolPromptFormat
|
||||||
|
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class VLLMInferenceImpl(Inference):
|
||||||
|
"""Inference implementation for vLLM."""
|
||||||
|
def __init__(self, config: VLLMConfig):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
async def initialize(self):
|
||||||
|
log.info("Initializing vLLM inference adapter")
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def completion(self, model: str, content: InterleavedTextMedia, sampling_params: Any | None = ..., stream: bool | None = False, logprobs: LogProbConfig | None = None) -> CompletionResponse | CompletionResponseStreamChunk:
|
||||||
|
log.info("vLLM completion")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def chat_completion(self, model: str, messages: list[Message], sampling_params: Any | None = ..., tools: list[ToolDefinition] | None = ..., tool_choice: ToolChoice | None = ..., tool_prompt_format: ToolPromptFormat | None = ..., stream: bool | None = False, logprobs: LogProbConfig | None = None) -> ChatCompletionResponse | ChatCompletionResponseStreamChunk:
|
||||||
|
log.info("vLLM chat completion")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def embeddings(self, model: str, contents: list[InterleavedTextMedia]) -> EmbeddingsResponse:
|
||||||
|
log.info("vLLM embeddings")
|
Loading…
Add table
Add a link
Reference in a new issue