This commit is contained in:
Yuan Tang 2024-10-10 20:58:52 -04:00
parent cdadf0f87d
commit 7bbce6394a
No known key found for this signature in database
2 changed files with 10 additions and 3 deletions

View file

@ -10,7 +10,6 @@ from llama_models.schema_utils import json_schema_type
from pydantic import BaseModel, Field
# TODO: Any other engine configs
@json_schema_type
class VLLMImplConfig(BaseModel):
url: Optional[str] = Field(

View file

@ -29,7 +29,8 @@ from .config import VLLMImplConfig
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
VLLM_SUPPORTED_MODELS = {
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
"Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
}
@ -48,7 +49,14 @@ class VLLMInferenceAdapter(ModelRegistryHelper, Inference):
async def shutdown(self) -> None:
pass
def completion(self, request: CompletionRequest) -> AsyncGenerator:
def completion(
self,
model: str,
content: InterleavedTextMedia,
sampling_params: Optional[SamplingParams] = SamplingParams(),
stream: Optional[bool] = False,
logprobs: Optional[LogProbConfig] = None,
) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
raise NotImplementedError()
def chat_completion(