mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-29 03:14:19 +00:00
Enable remote::vllm (#384)
* Enable remote::vllm * Kill the giant list of hard coded models
This commit is contained in:
parent
093c9f1987
commit
b10e9f46bb
5 changed files with 80 additions and 53 deletions
|
@ -11,12 +11,16 @@ from pydantic import BaseModel, Field
|
|||
|
||||
|
||||
@json_schema_type
|
||||
class VLLMImplConfig(BaseModel):
|
||||
class VLLMInferenceAdapterConfig(BaseModel):
|
||||
url: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The URL for the vLLM model serving endpoint",
|
||||
)
|
||||
max_tokens: int = Field(
|
||||
default=4096,
|
||||
description="Maximum number of tokens to generate.",
|
||||
)
|
||||
api_token: Optional[str] = Field(
|
||||
default=None,
|
||||
default="fake",
|
||||
description="The API token",
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue