diff --git a/docs/source/providers/inference/remote_llamacpp.md b/docs/source/providers/inference/remote_llamacpp.md index d17b5adf2..07ac0769e 100644 --- a/docs/source/providers/inference/remote_llamacpp.md +++ b/docs/source/providers/inference/remote_llamacpp.md @@ -5,12 +5,12 @@ | Field | Type | Required | Default | Description | |-------|------|----------|---------|-------------| | `api_key` | `str \| None` | No | | The llama.cpp server API key (optional for local servers) | -| `openai_compat_api_base` | `` | No | http://localhost:8080/v1 | The URL for the llama.cpp server with OpenAI-compatible API | +| `openai_compat_api_base` | `` | No | http://localhost:8080 | The URL for the llama.cpp server with OpenAI-compatible API | ## Sample Configuration ```yaml -openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 +openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080} api_key: ${env.LLAMACPP_API_KEY:=} ``` diff --git a/llama_stack/providers/remote/inference/llamacpp/config.py b/llama_stack/providers/remote/inference/llamacpp/config.py index 4a16cb17d..3aff56df6 100644 --- a/llama_stack/providers/remote/inference/llamacpp/config.py +++ b/llama_stack/providers/remote/inference/llamacpp/config.py @@ -26,13 +26,13 @@ class LlamaCppImplConfig(BaseModel): ) openai_compat_api_base: str = Field( - default="http://localhost:8080/v1", + default="http://localhost:8080", description="The URL for the llama.cpp server with OpenAI-compatible API", ) @classmethod def sample_run_config(cls, api_key: str = "${env.LLAMACPP_API_KEY:=}") -> dict[str, Any]: return { - "openai_compat_api_base": "${env.LLAMACPP_URL:http://localhost:8080}/v1", + "openai_compat_api_base": "${env.LLAMACPP_URL:=http://localhost:8080}", "api_key": api_key, } diff --git a/llama_stack/templates/llamacpp/run-with-safety.yaml b/llama_stack/templates/llamacpp/run-with-safety.yaml index a83db1332..b225fe4da 100644 --- a/llama_stack/templates/llamacpp/run-with-safety.yaml +++ b/llama_stack/templates/llamacpp/run-with-safety.yaml @@ -16,7 +16,7 @@ providers: - provider_id: llamacpp provider_type: remote::llamacpp config: - openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080} api_key: ${env.LLAMACPP_API_KEY:=} - provider_id: sentence-transformers provider_type: inline::sentence-transformers diff --git a/llama_stack/templates/llamacpp/run.yaml b/llama_stack/templates/llamacpp/run.yaml index c9f7b1b8c..d10e2dd66 100644 --- a/llama_stack/templates/llamacpp/run.yaml +++ b/llama_stack/templates/llamacpp/run.yaml @@ -16,7 +16,7 @@ providers: - provider_id: llamacpp provider_type: remote::llamacpp config: - openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080} api_key: ${env.LLAMACPP_API_KEY:=} - provider_id: sentence-transformers provider_type: inline::sentence-transformers diff --git a/llama_stack/templates/starter/run.yaml b/llama_stack/templates/starter/run.yaml index c21b1e5bf..ca9ce9566 100644 --- a/llama_stack/templates/starter/run.yaml +++ b/llama_stack/templates/starter/run.yaml @@ -133,7 +133,7 @@ providers: - provider_id: ${env.ENABLE_LLAMACPP:=__disabled__} provider_type: remote::llamacpp config: - openai_compat_api_base: ${env.LLAMACPP_URL:http://localhost:8080}/v1 + openai_compat_api_base: ${env.LLAMACPP_URL:=http://localhost:8080} api_key: ${env.LLAMACPP_API_KEY:=} - provider_id: ${env.ENABLE_SENTENCE_TRANSFORMERS:=sentence-transformers} provider_type: inline::sentence-transformers