diff --git a/llama_stack/providers/remote/inference/centml/centml.py b/llama_stack/providers/remote/inference/centml/centml.py index c5b400d55..c7e4f2d9e 100644 --- a/llama_stack/providers/remote/inference/centml/centml.py +++ b/llama_stack/providers/remote/inference/centml/centml.py @@ -8,7 +8,6 @@ from typing import AsyncGenerator, List, Optional, Union from openai import OpenAI -from llama_stack import logcat from llama_models.datatypes import CoreModelId from llama_models.llama3.api.chat_format import ChatFormat from llama_models.llama3.api.tokenizer import Tokenizer @@ -61,6 +60,10 @@ MODEL_ALIASES = [ "meta-llama/Llama-3.2-3B-Instruct", CoreModelId.llama3_2_3b_instruct.value, ), + build_model_entry( + "meta-llama/Llama-3.3-70B-Instruct", + CoreModelId.llama3_3_70b_instruct.value, + ), ] @@ -254,7 +257,6 @@ class CentMLInferenceAdapter(ModelRegistryHelper, Inference, request.stream, **self._build_options(request.sampling_params, request.logprobs, request.response_format), } - logcat.debug("inference", f"params to centml: {params}") return params def _build_options( diff --git a/llama_stack/providers/remote/inference/centml/config.py b/llama_stack/providers/remote/inference/centml/config.py index 7bf101b23..bc9711bdb 100644 --- a/llama_stack/providers/remote/inference/centml/config.py +++ b/llama_stack/providers/remote/inference/centml/config.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, Field, SecretStr @json_schema_type class CentMLImplConfig(BaseModel): url: str = Field( - default="https://api.centml.org/openai/v1", + default="https://api.centml.com/openai/v1", description="The CentML API server URL", ) api_key: Optional[SecretStr] = Field( @@ -24,6 +24,6 @@ class CentMLImplConfig(BaseModel): @classmethod def sample_run_config(cls, **kwargs) -> Dict[str, Any]: return { - "url": "https://api.centml.org/openai/v1", + "url": "https://api.centml.com/openai/v1", "api_key": "${env.CENTML_API_KEY}", } diff --git a/llama_stack/templates/centml/run.yaml b/llama_stack/templates/centml/run.yaml index d5f599828..db56ad33e 100644 --- a/llama_stack/templates/centml/run.yaml +++ b/llama_stack/templates/centml/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: centml provider_type: remote::centml config: - url: https://api.centml.org/openai/v1 + url: https://api.centml.com/openai/v1 api_key: "${env.CENTML_API_KEY}" - provider_id: sentence-transformers provider_type: inline::sentence-transformers diff --git a/tests/integration/fixtures/common.py b/tests/integration/fixtures/common.py index e410039e7..0fc73297d 100644 --- a/tests/integration/fixtures/common.py +++ b/tests/integration/fixtures/common.py @@ -34,6 +34,7 @@ def provider_data(): "GEMINI_API_KEY": "gemini_api_key", "OPENAI_API_KEY": "openai_api_key", "TOGETHER_API_KEY": "together_api_key", + "CENTML_API_KEY": "centml_api_key", "ANTHROPIC_API_KEY": "anthropic_api_key", "GROQ_API_KEY": "groq_api_key", "WOLFRAM_ALPHA_API_KEY": "wolfram_alpha_api_key", diff --git a/tests/integration/inference/test_text_inference.py b/tests/integration/inference/test_text_inference.py index 7e3e14dbc..af5681eb5 100644 --- a/tests/integration/inference/test_text_inference.py +++ b/tests/integration/inference/test_text_inference.py @@ -12,7 +12,7 @@ from llama_stack.models.llama.sku_list import resolve_model from ..test_cases.test_case import TestCase -PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"} +PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm", "remote::centml"} def skip_if_model_doesnt_support_completion(client_with_models, model_id):