mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-07-29 07:14:20 +00:00
Working
This commit is contained in:
parent
cdadf0f87d
commit
7bbce6394a
2 changed files with 10 additions and 3 deletions
|
@ -10,7 +10,6 @@ from llama_models.schema_utils import json_schema_type
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
|
|
||||||
# TODO: Any other engine configs
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class VLLMImplConfig(BaseModel):
|
class VLLMImplConfig(BaseModel):
|
||||||
url: Optional[str] = Field(
|
url: Optional[str] = Field(
|
||||||
|
|
|
@ -29,7 +29,8 @@ from .config import VLLMImplConfig
|
||||||
|
|
||||||
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
|
# Reference: https://docs.vllm.ai/en/latest/models/supported_models.html
|
||||||
VLLM_SUPPORTED_MODELS = {
|
VLLM_SUPPORTED_MODELS = {
|
||||||
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3-70B-Instruct",
|
"Llama3.1-8B-Instruct": "meta-llama/Meta-Llama-3.1-8B-Instruct",
|
||||||
|
"Llama3.1-70B-Instruct": "meta-llama/Meta-Llama-3.1-70B-Instruct",
|
||||||
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
"Llama3.1-405B-Instruct": "meta-llama/Meta-Llama-3.1-405B-Instruct",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +49,14 @@ class VLLMInferenceAdapter(ModelRegistryHelper, Inference):
|
||||||
async def shutdown(self) -> None:
|
async def shutdown(self) -> None:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def completion(self, request: CompletionRequest) -> AsyncGenerator:
|
def completion(
|
||||||
|
self,
|
||||||
|
model: str,
|
||||||
|
content: InterleavedTextMedia,
|
||||||
|
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||||
|
stream: Optional[bool] = False,
|
||||||
|
logprobs: Optional[LogProbConfig] = None,
|
||||||
|
) -> Union[CompletionResponse, CompletionResponseStreamChunk]:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
def chat_completion(
|
def chat_completion(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue