mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 04:00:42 +00:00
updated with vllm based values
rh-pre-commit.version: 2.3.2 rh-pre-commit.check-secrets: ENABLED
This commit is contained in:
parent
a701f68bd7
commit
17e74251e2
11 changed files with 551 additions and 102 deletions
|
|
@ -41,7 +41,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
provider_type="inline::sentence-transformers",
|
||||
# CrossEncoder depends on torchao.quantization
|
||||
pip_packages=[
|
||||
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
|
||||
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cu130",
|
||||
"sentence-transformers --no-deps",
|
||||
# required by some SentenceTransformers architectures for tensor rearrange/merge ops
|
||||
"einops",
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ def available_providers() -> list[ProviderSpec]:
|
|||
"provider_type": "inline::torchtune-cpu",
|
||||
"pip_packages": (
|
||||
cast(list[str], torchtune_def["pip_packages"])
|
||||
+ ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu"]
|
||||
+ ["torch torchtune>=0.5.0 torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cu130"]
|
||||
),
|
||||
},
|
||||
),
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
params = params.model_copy()
|
||||
|
||||
# Apply vLLM-specific defaults
|
||||
if params.max_tokens is None and self.config.max_tokens:
|
||||
if (params.max_tokens is None or params.max_tokens == 0) and self.config.max_tokens:
|
||||
params.max_tokens = self.config.max_tokens
|
||||
|
||||
# This is to be consistent with OpenAI API and support vLLM <= v0.6.3
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue