diff --git a/llama_stack/providers/remote/inference/centml/centml.py b/llama_stack/providers/remote/inference/centml/centml.py index 02b4df475..767c6dc37 100644 --- a/llama_stack/providers/remote/inference/centml/centml.py +++ b/llama_stack/providers/remote/inference/centml/centml.py @@ -256,32 +256,6 @@ class CentMLInferenceAdapter( logcat.debug("inference", f"params to centml: {params}") return params - def _build_options( - self, - sampling_params: Optional[SamplingParams], - fmt: Optional[ResponseFormat], - ) -> dict: - """ - Build temperature, max_tokens, top_p, etc., plus any response format data. - """ - options = get_sampling_options(sampling_params) - options.setdefault("max_tokens", 512) - - if fmt: - if fmt.type == ResponseFormatType.json_schema.value: - options["response_format"] = { - "type": "json_object", - "schema": fmt.json_schema, - } - elif fmt.type == ResponseFormatType.grammar.value: - raise NotImplementedError( - "Grammar response format not supported yet" - ) - else: - raise ValueError(f"Unknown response format {fmt.type}") - - return options - def _build_options( self, sampling_params: Optional[SamplingParams], @@ -302,7 +276,7 @@ class CentMLInferenceAdapter( raise ValueError(f"Unknown response format {fmt.type}") if logprobs and logprobs.top_k: - options["logprobs"] = 1 + options["logprobs"] = logprobs.top_k return options diff --git a/llama_stack/templates/centml/run.yaml b/llama_stack/templates/centml/run.yaml index 9008aa8cc..d5f599828 100644 --- a/llama_stack/templates/centml/run.yaml +++ b/llama_stack/templates/centml/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: centml provider_type: remote::centml config: - url: https://api.centml.com/openai/v1 + url: https://api.centml.org/openai/v1 api_key: "${env.CENTML_API_KEY}" - provider_id: sentence-transformers provider_type: inline::sentence-transformers @@ -104,6 +104,12 @@ metadata_store: db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/centml}/registry.db models: +- metadata: {} + model_id: meta-llama/Llama-3.1-3B-Instruct + provider_id: centml + provider_model_id: meta-llama/Llama-3.2-3B-Instruct + model_type: llm + - metadata: {} model_id: meta-llama/Llama-3.3-70B-Instruct provider_id: centml