mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-24 03:13:56 +00:00
Merge branch 'main' into openai-vector-store/qdrant
This commit is contained in:
commit
970d0f307f
338 changed files with 15301 additions and 15997 deletions
20
docs/_static/llama-stack-spec.html
vendored
20
docs/_static/llama-stack-spec.html
vendored
|
|
@ -15078,22 +15078,6 @@
|
|||
"DPOAlignmentConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reward_scale": {
|
||||
"type": "number",
|
||||
"description": "Scaling factor for the reward signal"
|
||||
},
|
||||
"reward_clip": {
|
||||
"type": "number",
|
||||
"description": "Maximum absolute value for reward clipping"
|
||||
},
|
||||
"epsilon": {
|
||||
"type": "number",
|
||||
"description": "Small value added for numerical stability"
|
||||
},
|
||||
"gamma": {
|
||||
"type": "number",
|
||||
"description": "Discount factor for future rewards"
|
||||
},
|
||||
"beta": {
|
||||
"type": "number",
|
||||
"description": "Temperature parameter for the DPO loss"
|
||||
|
|
@ -15106,10 +15090,6 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reward_scale",
|
||||
"reward_clip",
|
||||
"epsilon",
|
||||
"gamma",
|
||||
"beta",
|
||||
"loss_type"
|
||||
],
|
||||
|
|
|
|||
18
docs/_static/llama-stack-spec.yaml
vendored
18
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -11163,20 +11163,6 @@ components:
|
|||
DPOAlignmentConfig:
|
||||
type: object
|
||||
properties:
|
||||
reward_scale:
|
||||
type: number
|
||||
description: Scaling factor for the reward signal
|
||||
reward_clip:
|
||||
type: number
|
||||
description: >-
|
||||
Maximum absolute value for reward clipping
|
||||
epsilon:
|
||||
type: number
|
||||
description: >-
|
||||
Small value added for numerical stability
|
||||
gamma:
|
||||
type: number
|
||||
description: Discount factor for future rewards
|
||||
beta:
|
||||
type: number
|
||||
description: Temperature parameter for the DPO loss
|
||||
|
|
@ -11186,10 +11172,6 @@ components:
|
|||
description: The type of loss function to use for DPO
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reward_scale
|
||||
- reward_clip
|
||||
- epsilon
|
||||
- gamma
|
||||
- beta
|
||||
- loss_type
|
||||
title: DPOAlignmentConfig
|
||||
|
|
|
|||
|
|
@ -165,7 +165,7 @@
|
|||
"# use this helper if needed to kill the server \n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -275,7 +275,7 @@
|
|||
"# use this helper if needed to kill the server \n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@
|
|||
"# use this helper if needed to kill the server \n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -3216,19 +3216,19 @@
|
|||
"INFO:datasets:Duckdb version 1.1.3 available.\n",
|
||||
"INFO:datasets:TensorFlow version 2.18.0 available.\n",
|
||||
"INFO:datasets:JAX version 0.4.33 available.\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::equality served by basic\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::subset_of served by basic\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
||||
"INFO:llama_stack.distribution.stack:\n"
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: basic::equality served by basic\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: basic::subset_of served by basic\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: basic::regex_parser_multiple_choice_answer served by basic\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::factuality served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-correctness served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-relevancy served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::answer-similarity served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::faithfulness served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-entity-recall served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-precision served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-recall served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:Scoring_fns: braintrust::context-relevancy served by braintrust\n",
|
||||
"INFO:llama_stack.core.stack:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
@ -3448,7 +3448,7 @@
|
|||
"\n",
|
||||
"os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n",
|
||||
"\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"client = LlamaStackAsLibraryClient(\"experimental-post-training\")\n",
|
||||
"_ = client.initialize()"
|
||||
]
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@
|
|||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from rich.pretty import pprint\n",
|
||||
"import json\n",
|
||||
"import uuid\n",
|
||||
|
|
|
|||
|
|
@ -661,7 +661,7 @@
|
|||
"except ImportError:\n",
|
||||
" print(\"Not in Google Colab environment\")\n",
|
||||
"\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackAsLibraryClient(\"together\")\n",
|
||||
"_ = client.initialize()"
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@
|
|||
],
|
||||
"source": [
|
||||
"from llama_stack_client import LlamaStackClient, Agent\n",
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from rich.pretty import pprint\n",
|
||||
"import json\n",
|
||||
"import uuid\n",
|
||||
|
|
|
|||
|
|
@ -194,7 +194,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||
"client.initialize()"
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||
"client.initialize()"
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||
"client.initialize()"
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from llama_stack.distribution.library_client import LlamaStackAsLibraryClient\n",
|
||||
"from llama_stack.core.library_client import LlamaStackAsLibraryClient\n",
|
||||
"\n",
|
||||
"client = LlamaStackAsLibraryClient(\"nvidia\")\n",
|
||||
"client.initialize()"
|
||||
|
|
|
|||
|
|
@ -1 +1 @@
|
|||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack/distribution/server/endpoints.py` using the `generate.py` utility.
|
||||
The RFC Specification (OpenAPI format) is generated from the set of API endpoints located in `llama_stack.core/server/endpoints.py` using the `generate.py` utility.
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import fire
|
|||
import ruamel.yaml as yaml
|
||||
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_VERSION # noqa: E402
|
||||
from llama_stack.distribution.stack import LlamaStack # noqa: E402
|
||||
from llama_stack.core.stack import LlamaStack # noqa: E402
|
||||
|
||||
from .pyopenapi.options import Options # noqa: E402
|
||||
from .pyopenapi.specification import Info, Server # noqa: E402
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ from typing import TextIO
|
|||
from typing import Any, List, Optional, Union, get_type_hints, get_origin, get_args
|
||||
|
||||
from llama_stack.strong_typing.schema import object_to_json, StrictJsonType
|
||||
from llama_stack.distribution.resolver import api_protocol_map
|
||||
from llama_stack.core.resolver import api_protocol_map
|
||||
|
||||
from .generator import Generator
|
||||
from .options import Options
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ The API is defined in the [YAML](_static/llama-stack-spec.yaml) and [HTML](_stat
|
|||
|
||||
To prove out the API, we implemented a handful of use cases to make things more concrete. The [llama-stack-apps](https://github.com/meta-llama/llama-stack-apps) repository contains [6 different examples](https://github.com/meta-llama/llama-stack-apps/tree/main/examples) ranging from very basic to a multi turn agent.
|
||||
|
||||
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/distribution/server/server.py) repository.
|
||||
There is also a sample inference endpoint implementation in the [llama-stack](https://github.com/meta-llama/llama-stack/blob/main/llama_stack.core/server/server.py) repository.
|
||||
|
||||
## Limitations
|
||||
|
||||
|
|
|
|||
|
|
@ -187,7 +187,7 @@
|
|||
"# use this helper if needed to kill the server \n",
|
||||
"def kill_llama_stack_server():\n",
|
||||
" # Kill any existing llama stack server processes\n",
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.distribution.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
" os.system(\"ps aux | grep -v grep | grep llama_stack.core.server.server | awk '{print $2}' | xargs kill -9\")\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
|
|||
|
|
@ -355,7 +355,7 @@ server:
|
|||
8. Run the server:
|
||||
|
||||
```bash
|
||||
python -m llama_stack.distribution.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||
python -m llama_stack.core.server.server --yaml-config ~/.llama/run-byoa.yaml
|
||||
```
|
||||
|
||||
9. Test the API:
|
||||
|
|
|
|||
|
|
@ -103,5 +103,5 @@ llama stack run together
|
|||
|
||||
2. Start Streamlit UI
|
||||
```bash
|
||||
uv run --with ".[ui]" streamlit run llama_stack/distribution/ui/app.py
|
||||
uv run --with ".[ui]" streamlit run llama_stack.core/ui/app.py
|
||||
```
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ spec:
|
|||
- name: llama-stack
|
||||
image: localhost/llama-stack-run-k8s:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/app/config.yaml"]
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/app/config.yaml"]
|
||||
ports:
|
||||
- containerPort: 5000
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ Build a Llama stack container
|
|||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack/distributions/**/build.yaml. If this argument is not provided, you will
|
||||
--config CONFIG Path to a config file to use for the build. You can find example configs in llama_stack.cores/**/build.yaml. If this argument is not provided, you will
|
||||
be prompted to enter information interactively (default: None)
|
||||
--template TEMPLATE Name of the example template config to use for build. You may use `llama stack build --list-templates` to check out the available templates (default: None)
|
||||
--list-templates Show the available templates for building a Llama Stack distribution (default: False)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ llama stack build --template starter --image-type venv
|
|||
```
|
||||
|
||||
```python
|
||||
from llama_stack.distribution.library_client import LlamaStackAsLibraryClient
|
||||
from llama_stack.core.library_client import LlamaStackAsLibraryClient
|
||||
|
||||
client = LlamaStackAsLibraryClient(
|
||||
"starter",
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ spec:
|
|||
value: "${SAFETY_MODEL}"
|
||||
- name: TAVILY_SEARCH_API_KEY
|
||||
value: "${TAVILY_SEARCH_API_KEY}"
|
||||
command: ["python", "-m", "llama_stack.distribution.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
command: ["python", "-m", "llama_stack.core.server.server", "--config", "/etc/config/stack_run_config.yaml", "--port", "8321"]
|
||||
ports:
|
||||
- containerPort: 8321
|
||||
volumeMounts:
|
||||
|
|
|
|||
|
|
@ -1,9 +1,4 @@
|
|||
# External Providers Guide
|
||||
|
||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
||||
- Create and maintain your own providers independently
|
||||
- Share providers with others without contributing to the main codebase
|
||||
- Keep provider-specific code separate from the core Llama Stack code
|
||||
# Creating External Providers
|
||||
|
||||
## Configuration
|
||||
|
||||
|
|
@ -55,17 +50,6 @@ Llama Stack supports two types of external providers:
|
|||
1. **Remote Providers**: Providers that communicate with external services (e.g., cloud APIs)
|
||||
2. **Inline Providers**: Providers that run locally within the Llama Stack process
|
||||
|
||||
## Known External Providers
|
||||
|
||||
Here's a list of known external providers that you can use with Llama Stack:
|
||||
|
||||
| Name | Description | API | Type | Repository |
|
||||
|------|-------------|-----|------|------------|
|
||||
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
||||
|
||||
### Remote Provider Specification
|
||||
|
||||
Remote providers are used when you need to communicate with external services. Here's an example for a custom Ollama provider:
|
||||
|
|
@ -119,9 +103,9 @@ container_image: custom-vector-store:latest # optional
|
|||
- `provider_data_validator`: Optional validator for provider data
|
||||
- `container_image`: Optional container image to use instead of pip packages
|
||||
|
||||
## Required Implementation
|
||||
## Required Fields
|
||||
|
||||
## All Providers
|
||||
### All Providers
|
||||
|
||||
All providers must contain a `get_provider_spec` function in their `provider` module. This is a standardized structure that Llama Stack expects and is necessary for getting things such as the config class. The `get_provider_spec` method returns a structure identical to the `adapter`. An example function may look like:
|
||||
|
||||
|
|
@ -146,7 +130,7 @@ def get_provider_spec() -> ProviderSpec:
|
|||
)
|
||||
```
|
||||
|
||||
### Remote Providers
|
||||
#### Remote Providers
|
||||
|
||||
Remote providers must expose a `get_adapter_impl()` function in their module that takes two arguments:
|
||||
1. `config`: An instance of the provider's config class
|
||||
|
|
@ -162,7 +146,7 @@ async def get_adapter_impl(
|
|||
return OllamaInferenceAdapter(config)
|
||||
```
|
||||
|
||||
### Inline Providers
|
||||
#### Inline Providers
|
||||
|
||||
Inline providers must expose a `get_provider_impl()` function in their module that takes two arguments:
|
||||
1. `config`: An instance of the provider's config class
|
||||
|
|
@ -189,7 +173,40 @@ Version: 0.1.0
|
|||
Location: /path/to/venv/lib/python3.10/site-packages
|
||||
```
|
||||
|
||||
## Example using `external_providers_dir`: Custom Ollama Provider
|
||||
## Best Practices
|
||||
|
||||
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
||||
|
||||
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
||||
|
||||
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
||||
|
||||
4. **Documentation**: Include clear documentation in your provider package about:
|
||||
- Installation requirements
|
||||
- Configuration options
|
||||
- Usage examples
|
||||
- Any limitations or known issues
|
||||
|
||||
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
||||
You can refer to the [integration tests
|
||||
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
||||
information. Execute the test for the Provider type you are developing.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If your external provider isn't being loaded:
|
||||
|
||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
||||
2. Verify that the YAML files are properly formatted.
|
||||
3. Ensure all required Python packages are installed.
|
||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
||||
|
||||
## Examples
|
||||
|
||||
### Example using `external_providers_dir`: Custom Ollama Provider
|
||||
|
||||
Here's a complete example of creating and using a custom Ollama provider:
|
||||
|
||||
|
|
@ -241,7 +258,7 @@ external_providers_dir: ~/.llama/providers.d/
|
|||
The provider will now be available in Llama Stack with the type `remote::custom_ollama`.
|
||||
|
||||
|
||||
## Example using `module`: ramalama-stack
|
||||
### Example using `module`: ramalama-stack
|
||||
|
||||
[ramalama-stack](https://github.com/containers/ramalama-stack) is a recognized external provider that supports installation via module.
|
||||
|
||||
|
|
@ -266,35 +283,4 @@ additional_pip_packages:
|
|||
|
||||
No other steps are required other than `llama stack build` and `llama stack run`. The build process will use `module` to install all of the provider dependencies, retrieve the spec, etc.
|
||||
|
||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Package Naming**: Use the prefix `llama-stack-provider-` for your provider packages to make them easily identifiable.
|
||||
|
||||
2. **Version Management**: Keep your provider package versioned and compatible with the Llama Stack version you're using.
|
||||
|
||||
3. **Dependencies**: Only include the minimum required dependencies in your provider package.
|
||||
|
||||
4. **Documentation**: Include clear documentation in your provider package about:
|
||||
- Installation requirements
|
||||
- Configuration options
|
||||
- Usage examples
|
||||
- Any limitations or known issues
|
||||
|
||||
5. **Testing**: Include tests in your provider package to ensure it works correctly with Llama Stack.
|
||||
You can refer to the [integration tests
|
||||
guide](https://github.com/meta-llama/llama-stack/blob/main/tests/integration/README.md) for more
|
||||
information. Execute the test for the Provider type you are developing.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
If your external provider isn't being loaded:
|
||||
|
||||
1. Check that `module` points to a published pip package with a top level `provider` module including `get_provider_spec`.
|
||||
1. Check that the `external_providers_dir` path is correct and accessible.
|
||||
2. Verify that the YAML files are properly formatted.
|
||||
3. Ensure all required Python packages are installed.
|
||||
4. Check the Llama Stack server logs for any error messages - turn on debug logging to get more
|
||||
information using `LLAMA_STACK_LOGGING=all=debug`.
|
||||
5. Verify that the provider package is installed in your Python environment if using `external_providers_dir`.
|
||||
The provider will now be available in Llama Stack with the type `remote::ramalama`.
|
||||
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
10
docs/source/providers/external/external-providers-list.md
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
# Known External Providers
|
||||
|
||||
Here's a list of known external providers that you can use with Llama Stack:
|
||||
|
||||
| Name | Description | API | Type | Repository |
|
||||
|------|-------------|-----|------|------------|
|
||||
| KubeFlow Training | Train models with KubeFlow | Post Training | Remote | [llama-stack-provider-kft](https://github.com/opendatahub-io/llama-stack-provider-kft) |
|
||||
| KubeFlow Pipelines | Train models with KubeFlow Pipelines | Post Training | Inline **and** Remote | [llama-stack-provider-kfp-trainer](https://github.com/opendatahub-io/llama-stack-provider-kfp-trainer) |
|
||||
| RamaLama | Inference models with RamaLama | Inference | Remote | [ramalama-stack](https://github.com/containers/ramalama-stack) |
|
||||
| TrustyAI LM-Eval | Evaluate models with TrustyAI LM-Eval | Eval | Remote | [llama-stack-provider-lmeval](https://github.com/trustyai-explainability/llama-stack-provider-lmeval) |
|
||||
13
docs/source/providers/external/index.md
vendored
Normal file
13
docs/source/providers/external/index.md
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# External Providers
|
||||
|
||||
Llama Stack supports external providers that live outside of the main codebase. This allows you to:
|
||||
- Create and maintain your own providers independently
|
||||
- Share providers with others without contributing to the main codebase
|
||||
- Keep provider-specific code separate from the core Llama Stack code
|
||||
|
||||
```{toctree}
|
||||
:maxdepth: 1
|
||||
|
||||
external-providers-list
|
||||
external-providers-guide
|
||||
```
|
||||
|
|
@ -15,7 +15,7 @@ Importantly, Llama Stack always strives to provide at least one fully inline pro
|
|||
```{toctree}
|
||||
:maxdepth: 1
|
||||
|
||||
external
|
||||
external/index
|
||||
openai
|
||||
inference/index
|
||||
agents/index
|
||||
|
|
|
|||
|
|
@ -24,6 +24,10 @@ HuggingFace-based post-training provider for fine-tuning models using the Huggin
|
|||
| `weight_decay` | `<class 'float'>` | No | 0.01 | |
|
||||
| `dataloader_num_workers` | `<class 'int'>` | No | 4 | |
|
||||
| `dataloader_pin_memory` | `<class 'bool'>` | No | True | |
|
||||
| `dpo_beta` | `<class 'float'>` | No | 0.1 | |
|
||||
| `use_reference_model` | `<class 'bool'>` | No | True | |
|
||||
| `dpo_loss_type` | `Literal['sigmoid', 'hinge', 'ipo', 'kto_pair'` | No | sigmoid | |
|
||||
| `dpo_output_dir` | `<class 'str'>` | No | ./checkpoints/dpo | |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@
|
|||
"from pydantic import BaseModel\n",
|
||||
"from termcolor import cprint\n",
|
||||
"\n",
|
||||
"from llama_stack.distribution.datatypes import RemoteProviderConfig\n",
|
||||
"from llama_stack.core.datatypes import RemoteProviderConfig\n",
|
||||
"from llama_stack.apis.safety import Safety\n",
|
||||
"from llama_stack_client import LlamaStackClient\n",
|
||||
"\n",
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue