Enable remote::vllm (#384)

* Enable remote::vllm * Kill the giant list of hard coded models
2025-12-14 22:22:39 +00:00 · 2024-11-06 14:42:44 -08:00 · 2024-11-06 14:42:44 -08:00 · b10e9f46bb
commit b10e9f46bb
parent 093c9f1987
5 changed files with 80 additions and 53 deletions
--- a/llama_stack/providers/adapters/inference/vllm/config.py
+++ b/llama_stack/providers/adapters/inference/vllm/config.py
@ -11,12 +11,16 @@ from pydantic import BaseModel, Field


@json_schema_type
-class VLLMImplConfig(BaseModel):
+class VLLMInferenceAdapterConfig(BaseModel):
    url: Optional[str] = Field(
        default=None,
        description="The URL for the vLLM model serving endpoint",
    )
+    max_tokens: int = Field(
+        default=4096,
+        description="Maximum number of tokens to generate.",
+    )
    api_token: Optional[str] = Field(
-        default=None,
+        default="fake",
        description="The API token",
    )