init: first remote llamacpp implementation

2025-12-23 10:12:30 +00:00 · 2025-07-12 16:28:08 -07:00 · 2025-07-12 16:28:08 -07:00 · ec73d0d55b
commit ec73d0d55b
parent 8374d4cefd
9 changed files with 675 additions and 0 deletions
--- a/llama_stack/providers/remote/inference/llamacpp/llamacpp.py
+++ b/llama_stack/providers/remote/inference/llamacpp/llamacpp.py
@ -0,0 +1,36 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from llama_stack.apis.models.models import Model
+from llama_stack.providers.remote.inference.llamacpp.config import LlamaCppImplConfig
+from llama_stack.providers.utils.inference.litellm_openai_mixin import (
+    LiteLLMOpenAIMixin,
+)
+
+
+class LlamaCppInferenceAdapter(LiteLLMOpenAIMixin):
+    _config: LlamaCppImplConfig
+
+    def __init__(self, config: LlamaCppImplConfig):
+        LiteLLMOpenAIMixin.__init__(
+            self,
+            model_entries=[],  # llama.cpp can work with any GGUF model
+            api_key_from_config=config.api_key,
+            provider_data_api_key_field="llamacpp_api_key",
+            openai_compat_api_base=config.openai_compat_api_base,
+        )
+        self.config = config
+
+    async def register_model(self, model: Model) -> Model:
+        # llama.cpp can work with any GGUF model, so we accept any model name
+        # without validation against a predefined list
+        return model
+
+    async def initialize(self):
+        await super().initialize()
+
+    async def shutdown(self):
+        await super().shutdown()