mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-08 13:00:52 +00:00
chore: Revert "fix: fix nvidia provider (#3716)"
This reverts commit c940fe7938
.
This commit is contained in:
parent
c940fe7938
commit
9e61a4ab8c
1 changed files with 7 additions and 21 deletions
|
@ -13,7 +13,6 @@ from llama_stack.apis.inference import (
|
|||
OpenAIEmbeddingUsage,
|
||||
)
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.model_registry import ModelRegistryHelper
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from . import NVIDIAConfig
|
||||
|
@ -22,7 +21,9 @@ from .utils import _is_nvidia_hosted
|
|||
logger = get_logger(name=__name__, category="inference::nvidia")
|
||||
|
||||
|
||||
class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
|
||||
class NVIDIAInferenceAdapter(OpenAIMixin):
|
||||
config: NVIDIAConfig
|
||||
|
||||
"""
|
||||
NVIDIA Inference Adapter for Llama Stack.
|
||||
|
||||
|
@ -36,27 +37,12 @@ class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
|
|||
- ModelRegistryHelper.check_model_availability() just returns False and shows a warning
|
||||
"""
|
||||
|
||||
def __init__(self, config: NVIDIAConfig) -> None:
|
||||
"""Initialize the NVIDIA inference adapter with configuration."""
|
||||
# Initialize ModelRegistryHelper with empty model entries since NVIDIA uses dynamic model discovery
|
||||
ModelRegistryHelper.__init__(self, model_entries=[], allowed_models=config.allowed_models)
|
||||
self.config = config
|
||||
|
||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {
|
||||
"embedding_dimension": 2048,
|
||||
"context_length": 8192,
|
||||
},
|
||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
||||
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
||||
"nvidia/nv-embedqa-mistral-7b-v2": {
|
||||
"embedding_dimension": 512,
|
||||
"context_length": 4096,
|
||||
},
|
||||
"snowflake/arctic-embed-l": {
|
||||
"embedding_dimension": 512,
|
||||
"context_length": 1024,
|
||||
},
|
||||
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
||||
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
||||
}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
|
@ -109,7 +95,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin, ModelRegistryHelper):
|
|||
response = await self.client.embeddings.create(
|
||||
model=await self._get_provider_model_id(model),
|
||||
input=input,
|
||||
encoding_format=(encoding_format if encoding_format is not None else NOT_GIVEN),
|
||||
encoding_format=encoding_format if encoding_format is not None else NOT_GIVEN,
|
||||
dimensions=dimensions if dimensions is not None else NOT_GIVEN,
|
||||
user=user if user is not None else NOT_GIVEN,
|
||||
extra_body=extra_body,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue