diff --git a/llama_stack/providers/remote/inference/centml/centml.py b/llama_stack/providers/remote/inference/centml/centml.py index c3798837b..0ed31aacd 100644 --- a/llama_stack/providers/remote/inference/centml/centml.py +++ b/llama_stack/providers/remote/inference/centml/centml.py @@ -17,6 +17,7 @@ from llama_stack.apis.inference import ( ChatCompletionRequest, ChatCompletionResponse, CompletionRequest, + CompletionResponse, EmbeddingsResponse, Inference, LogProbConfig, @@ -25,6 +26,7 @@ from llama_stack.apis.inference import ( ResponseFormatType, SamplingParams, ToolChoice, + ToolConfig, ToolDefinition, ToolPromptFormat, ) @@ -42,6 +44,7 @@ from llama_stack.providers.utils.inference.openai_compat import ( process_completion_stream_response, ) from llama_stack.providers.utils.inference.prompt_adapter import ( + chat_completion_request_to_prompt, completion_request_to_prompt, content_has_media, interleaved_content_as_str, @@ -176,6 +179,7 @@ class CentMLInferenceAdapter( response_format: Optional[ResponseFormat] = None, stream: Optional[bool] = False, logprobs: Optional[LogProbConfig] = None, + tool_config: Optional[ToolConfig] = None, ) -> AsyncGenerator: """ For "chat completion" style requests. diff --git a/llama_stack/templates/centml/build.yaml b/llama_stack/templates/centml/build.yaml index 489b9f8fd..aa8abd202 100644 --- a/llama_stack/templates/centml/build.yaml +++ b/llama_stack/templates/centml/build.yaml @@ -5,7 +5,7 @@ distribution_spec: providers: inference: - remote::centml - memory: + vector_io: - inline::faiss - remote::chromadb - remote::pgvector @@ -28,5 +28,6 @@ distribution_spec: - remote::brave-search - remote::tavily-search - inline::code-interpreter - - inline::memory-runtime + - inline::rag-runtime + - remote::model-context-protocol image_type: conda diff --git a/llama_stack/templates/centml/run.yaml b/llama_stack/templates/centml/run.yaml index 414dd9065..9008aa8cc 100644 --- a/llama_stack/templates/centml/run.yaml +++ b/llama_stack/templates/centml/run.yaml @@ -6,11 +6,11 @@ apis: - datasetio - eval - inference - - memory - safety - scoring - telemetry - tool_runtime + - vector_io providers: inference: - provider_id: centml @@ -22,7 +22,7 @@ providers: provider_type: inline::sentence-transformers config: {} - memory: + vector_io: - provider_id: faiss provider_type: inline::faiss config: @@ -92,8 +92,11 @@ providers: - provider_id: code-interpreter provider_type: inline::code-interpreter config: {} - - provider_id: memory-runtime - provider_type: inline::memory-runtime + - provider_id: rag-runtime + provider_type: inline::rag-runtime + config: {} + - provider_id: model-context-protocol + provider_type: remote::model-context-protocol config: {} metadata_store: @@ -116,14 +119,14 @@ models: shields: - shield_id: meta-llama/Llama-Guard-3-8B -memory_banks: [] +vector_dbs: [] datasets: [] scoring_fns: [] eval_tasks: [] tool_groups: - toolgroup_id: builtin::websearch provider_id: tavily-search - - toolgroup_id: builtin::memory - provider_id: memory-runtime + - toolgroup_id: builtin::rag + provider_id: rag-runtime - toolgroup_id: builtin::code_interpreter provider_id: code-interpreter