init: first remote llamacpp implementation

2025-12-24 05:53:55 +00:00 · 2025-07-12 16:28:08 -07:00 · 2025-07-12 16:28:08 -07:00 · ec73d0d55b
commit ec73d0d55b
parent 8374d4cefd
9 changed files with 675 additions and 0 deletions
--- a/llama_stack/providers/remote/inference/llamacpp/init.py
+++ b/llama_stack/providers/remote/inference/llamacpp/init.py
@ -0,0 +1,17 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.inference import InferenceProvider
+
+from .config import LlamaCppImplConfig
+
+
+async def get_adapter_impl(config: LlamaCppImplConfig, _deps) -> InferenceProvider:
+    # import dynamically so the import is used only when it is needed
+    from .llamacpp import LlamaCppInferenceAdapter
+
+    adapter = LlamaCppInferenceAdapter(config)
+    return adapter
--- a/llama_stack/providers/remote/inference/llamacpp/config.py
+++ b/llama_stack/providers/remote/inference/llamacpp/config.py
@ -0,0 +1,40 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from typing import Any, Union
+
+from llama_stack.schema_utils import json_schema_type
+
+from pydantic import BaseModel, Field
+
+
+class LlamaCppProviderDataValidator(BaseModel):
+    llamacpp_api_key: Union[str, None] = Field(
+        default=None,
+        description="API key for llama.cpp server (optional for local servers)",
+    )
+
+
+@json_schema_type
+class LlamaCppImplConfig(BaseModel):
+    api_key: Union[str, None] = Field(
+        default=None,
+        description="The llama.cpp server API key (optional for local servers)",
+    )
+
+    openai_compat_api_base: str = Field(
+        default="http://localhost:8080/v1",
+        description="The URL for the llama.cpp server with OpenAI-compatible API",
+    )
+
+    @classmethod
+    def sample_run_config(
+        cls, api_key: str = "${env.LLAMACPP_API_KEY:}"
+    ) -> dict[str, Any]:
+        return {
+            "openai_compat_api_base": "${env.LLAMACPP_URL:http://localhost:8080}/v1",
+            "api_key": api_key,
+        }
--- a/llama_stack/providers/remote/inference/llamacpp/llamacpp.py
+++ b/llama_stack/providers/remote/inference/llamacpp/llamacpp.py
@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.models.models import Model
+from llama_stack.providers.remote.inference.llamacpp.config import LlamaCppImplConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+
+
+class LlamaCppInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: LlamaCppImplConfig
+
+    def __init__(self, config: LlamaCppImplConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=[],  # llama.cpp can work with any GGUF model
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="llamacpp_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def register_model(self, model: Model) -> Model:
+        # llama.cpp can work with any GGUF model, so we accept any model name
+        # without validation against a predefined list
+        return model
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()