mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-28 15:02:37 +00:00
Working
This commit is contained in:
parent
cdadf0f87d
commit
7bbce6394a
2 changed files with 10 additions and 3 deletions
|
@ -10,7 +10,6 @@ from llama_models.schema_utils import json_schema_type
|
|||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
# TODO: Any other engine configs
|
||||
@json_schema_type
|
||||
class VLLMImplConfig(BaseModel):
|
||||
url: Optional[str] = Field(
|
||||
|
|
|
@ -29,7 +29,8 @@ from .config import VLLMImplConfig
|
|||
|
||||
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
|
||||
VLLM_SUPPORTED_MODELS = {
|
||||
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
|
||||
"Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||
}
|
||||
|
||||
|
@ -48,7 +49,14 @@ class VLLMInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def shutdown(self) -> None:
|
||||
pass
|
||||
|
||||
def completion(self, request: CompletionRequest) -> AsyncGenerator:
|
||||
def completion(
|
||||
self,
|
||||
model: str,
|
||||
content: InterleavedTextMedia,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def chat_completion(
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue