diff --git a/docs/docs/providers/inference/remote_nvidia.mdx b/docs/docs/providers/inference/remote_nvidia.mdx index b4e04176c..657dcf3bc 100644 --- a/docs/docs/providers/inference/remote_nvidia.mdx +++ b/docs/docs/providers/inference/remote_nvidia.mdx @@ -16,7 +16,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services. |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | The NVIDIA API key, only needed of using the hosted service | | `url` | `` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `timeout` | `` | No | 60 | Timeout for the HTTP requests | | `append_api_version` | `` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | diff --git a/llama_stack/providers/remote/inference/cerebras/cerebras.py b/llama_stack/providers/remote/inference/cerebras/cerebras.py index 291336f86..21a9aa04c 100644 --- a/llama_stack/providers/remote/inference/cerebras/cerebras.py +++ b/llama_stack/providers/remote/inference/cerebras/cerebras.py @@ -6,7 +6,10 @@ from urllib.parse import urljoin -from llama_stack.apis.inference import OpenAIEmbeddingsResponse +from llama_stack.apis.inference import ( + OpenAIEmbeddingsRequestWithExtraBody, + OpenAIEmbeddingsResponse, +) from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from .config import CerebrasImplConfig @@ -18,17 +21,15 @@ class CerebrasInferenceAdapter(OpenAIMixin): provider_data_api_key_field: str = "cerebras_api_key" def get_api_key(self) -> str: - return self.config.api_key.get_secret_value() + if self.config.auth_credential is None: + raise ValueError("Cerebras API key is required") + return self.config.auth_credential.get_secret_value() def get_base_url(self) -> str: return urljoin(self.config.base_url, "v1") async def openai_embeddings( self, - model: str, - input: str | list[str], - encoding_format: str | None = "float", - dimensions: int | None = None, - user: str | None = None, + params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: raise NotImplementedError() diff --git a/llama_stack/providers/remote/inference/cerebras/config.py b/llama_stack/providers/remote/inference/cerebras/config.py index dbab60a4b..9ba773724 100644 --- a/llama_stack/providers/remote/inference/cerebras/config.py +++ b/llama_stack/providers/remote/inference/cerebras/config.py @@ -7,7 +7,7 @@ import os from typing import Any -from pydantic import BaseModel, Field, SecretStr +from pydantic import BaseModel, Field from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.schema_utils import json_schema_type @@ -28,10 +28,6 @@ class CerebrasImplConfig(RemoteInferenceProviderConfig): default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL), description="Base URL for the Cerebras API", ) - api_key: SecretStr = Field( - default=SecretStr(os.environ.get("CEREBRAS_API_KEY")), # type: ignore[arg-type] - description="Cerebras API Key", - ) @classmethod def sample_run_config(cls, api_key: str = "${env.CEREBRAS_API_KEY:=}", **kwargs) -> dict[str, Any]: diff --git a/pyproject.toml b/pyproject.toml index 19c21d8eb..d2352a655 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,6 @@ classifiers = [ ] dependencies = [ "aiohttp", - "databricks-sdk", "fastapi>=0.115.0,<1.0", # server "fire", # for MCP in LLS client "httpx",