mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-28 19:04:19 +00:00
impls
-> inline
, adapters
-> remote
(#381)
This commit is contained in:
parent
b10e9f46bb
commit
994732e2e0
169 changed files with 106 additions and 105 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "llama_stack/providers/impls/ios/inference/executorch"]
|
[submodule "llama_stack/providers/impls/ios/inference/executorch"]
|
||||||
path = llama_stack/providers/impls/ios/inference/executorch
|
path = llama_stack/providers/inline/ios/inference/executorch
|
||||||
url = https://github.com/pytorch/executorch
|
url = https://github.com/pytorch/executorch
|
||||||
|
|
|
@ -6,8 +6,8 @@ This guide contains references to walk you through adding a new API provider.
|
||||||
1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory).
|
1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory).
|
||||||
2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers:
|
2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers:
|
||||||
|
|
||||||
- [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/adapters/inference)
|
- [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/remote/inference)
|
||||||
- [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/impls/meta_reference/inference)
|
- [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/inline/meta_reference/inference)
|
||||||
|
|
||||||
3. [Build a Llama Stack distribution](https://llama-stack.readthedocs.io/en/latest/distribution_dev/building_distro.html) with your API provider.
|
3. [Build a Llama Stack distribution](https://llama-stack.readthedocs.io/en/latest/distribution_dev/building_distro.html) with your API provider.
|
||||||
4. Test your code!
|
4. Test your code!
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
We offer both remote and on-device use of Llama Stack in Swift via two components:
|
We offer both remote and on-device use of Llama Stack in Swift via two components:
|
||||||
|
|
||||||
1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/)
|
1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/)
|
||||||
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/impls/ios/inference)
|
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/ios/inference)
|
||||||
|
|
||||||
```{image} ../../../../_static/remote_or_local.gif
|
```{image} ../../../../_static/remote_or_local.gif
|
||||||
:alt: Seamlessly switching between local, on-device inference and remote hosted inference
|
:alt: Seamlessly switching between local, on-device inference and remote hosted inference
|
||||||
|
|
|
@ -102,7 +102,7 @@ ollama pull llama3.1:70b-instruct-fp16
|
||||||
```
|
```
|
||||||
|
|
||||||
> [!NOTE]
|
> [!NOTE]
|
||||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models.
|
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
||||||
|
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
|
|
|
@ -386,7 +386,7 @@ ollama pull llama3.1:8b-instruct-fp16
|
||||||
ollama pull llama3.1:70b-instruct-fp16
|
ollama pull llama3.1:70b-instruct-fp16
|
||||||
```
|
```
|
||||||
|
|
||||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models.
|
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
||||||
|
|
||||||
|
|
||||||
To serve a new model with `ollama`
|
To serve a new model with `ollama`
|
||||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.apis.datasets import * # noqa: F403
|
||||||
from autoevals.llm import Factuality
|
from autoevals.llm import Factuality
|
||||||
from autoevals.ragas import AnswerCorrectness
|
from autoevals.ragas import AnswerCorrectness
|
||||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||||
aggregate_average,
|
aggregate_average,
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,7 +9,7 @@ from typing import List
|
||||||
from llama_stack.apis.inference import Message
|
from llama_stack.apis.inference import Message
|
||||||
from llama_stack.apis.safety import * # noqa: F403
|
from llama_stack.apis.safety import * # noqa: F403
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.agents.safety import ShieldRunnerMixin
|
from llama_stack.providers.inline.meta_reference.agents.safety import ShieldRunnerMixin
|
||||||
|
|
||||||
from .builtin import BaseTool
|
from .builtin import BaseTool
|
||||||
|
|
|
@ -27,7 +27,7 @@ from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
|
||||||
|
|
||||||
from llama_stack.apis.inference import QuantizationType
|
from llama_stack.apis.inference import QuantizationType
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.inference.config import (
|
from llama_stack.providers.inline.meta_reference.inference.config import (
|
||||||
MetaReferenceQuantizedInferenceConfig,
|
MetaReferenceQuantizedInferenceConfig,
|
||||||
)
|
)
|
||||||
|
|
|
@ -8,9 +8,9 @@ import tempfile
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef
|
from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef
|
||||||
from llama_stack.providers.impls.meta_reference.memory.config import FaissImplConfig
|
from llama_stack.providers.inline.meta_reference.memory.config import FaissImplConfig
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.memory.faiss import FaissMemoryImpl
|
from llama_stack.providers.inline.meta_reference.memory.faiss import FaissMemoryImpl
|
||||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||||
|
|
||||||
|
|
|
@ -13,15 +13,15 @@ from llama_stack.apis.datasetio import * # noqa: F403
|
||||||
from llama_stack.apis.datasets import * # noqa: F403
|
from llama_stack.apis.datasets import * # noqa: F403
|
||||||
from llama_stack.apis.inference.inference import Inference
|
from llama_stack.apis.inference.inference import Inference
|
||||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
|
||||||
EqualityScoringFn,
|
EqualityScoringFn,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import (
|
||||||
LlmAsJudgeScoringFn,
|
LlmAsJudgeScoringFn,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
|
||||||
SubsetOfScoringFn,
|
SubsetOfScoringFn,
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,18 +4,18 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||||
BaseScoringFn,
|
BaseScoringFn,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||||
aggregate_accuracy,
|
aggregate_accuracy,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.equality import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.equality import (
|
||||||
equality,
|
equality,
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
from llama_stack.apis.inference.inference import Inference
|
from llama_stack.apis.inference.inference import Inference
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||||
BaseScoringFn,
|
BaseScoringFn,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||||
|
@ -12,10 +12,10 @@ from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||||
import re
|
import re
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||||
aggregate_average,
|
aggregate_average,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
|
||||||
llm_as_judge_8b_correctness,
|
llm_as_judge_8b_correctness,
|
||||||
)
|
)
|
||||||
|
|
|
@ -4,17 +4,17 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||||
BaseScoringFn,
|
BaseScoringFn,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||||
aggregate_accuracy,
|
aggregate_accuracy,
|
||||||
)
|
)
|
||||||
|
|
||||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
|
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
|
||||||
subset_of,
|
subset_of,
|
||||||
)
|
)
|
||||||
|
|
|
@ -22,8 +22,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
"scikit-learn",
|
"scikit-learn",
|
||||||
]
|
]
|
||||||
+ kvstore_dependencies(),
|
+ kvstore_dependencies(),
|
||||||
module="llama_stack.providers.impls.meta_reference.agents",
|
module="llama_stack.providers.inline.meta_reference.agents",
|
||||||
config_class="llama_stack.providers.impls.meta_reference.agents.MetaReferenceAgentsImplConfig",
|
config_class="llama_stack.providers.inline.meta_reference.agents.MetaReferenceAgentsImplConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.inference,
|
Api.inference,
|
||||||
Api.safety,
|
Api.safety,
|
||||||
|
@ -36,8 +36,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="sample",
|
adapter_type="sample",
|
||||||
pip_packages=[],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.adapters.agents.sample",
|
module="llama_stack.providers.remote.agents.sample",
|
||||||
config_class="llama_stack.providers.adapters.agents.sample.SampleConfig",
|
config_class="llama_stack.providers.remote.agents.sample.SampleConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
api=Api.datasetio,
|
api=Api.datasetio,
|
||||||
provider_type="meta-reference",
|
provider_type="meta-reference",
|
||||||
pip_packages=["pandas"],
|
pip_packages=["pandas"],
|
||||||
module="llama_stack.providers.impls.meta_reference.datasetio",
|
module="llama_stack.providers.inline.meta_reference.datasetio",
|
||||||
config_class="llama_stack.providers.impls.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
|
config_class="llama_stack.providers.inline.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
|
||||||
api_dependencies=[],
|
api_dependencies=[],
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
|
@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
api=Api.eval,
|
api=Api.eval,
|
||||||
provider_type="meta-reference",
|
provider_type="meta-reference",
|
||||||
pip_packages=[],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.impls.meta_reference.eval",
|
module="llama_stack.providers.inline.meta_reference.eval",
|
||||||
config_class="llama_stack.providers.impls.meta_reference.eval.MetaReferenceEvalConfig",
|
config_class="llama_stack.providers.inline.meta_reference.eval.MetaReferenceEvalConfig",
|
||||||
api_dependencies=[
|
api_dependencies=[
|
||||||
Api.datasetio,
|
Api.datasetio,
|
||||||
Api.datasets,
|
Api.datasets,
|
||||||
|
|
|
@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
provider_type="meta-reference",
|
provider_type="meta-reference",
|
||||||
pip_packages=META_REFERENCE_DEPS,
|
pip_packages=META_REFERENCE_DEPS,
|
||||||
module="llama_stack.providers.impls.meta_reference.inference",
|
module="llama_stack.providers.inline.meta_reference.inference",
|
||||||
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceInferenceConfig",
|
config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceInferenceConfig",
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
|
@ -40,16 +40,16 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
"torchao==0.5.0",
|
"torchao==0.5.0",
|
||||||
]
|
]
|
||||||
),
|
),
|
||||||
module="llama_stack.providers.impls.meta_reference.inference",
|
module="llama_stack.providers.inline.meta_reference.inference",
|
||||||
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
|
config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
api=Api.inference,
|
api=Api.inference,
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="sample",
|
adapter_type="sample",
|
||||||
pip_packages=[],
|
pip_packages=[],
|
||||||
module="llama_stack.providers.adapters.inference.sample",
|
module="llama_stack.providers.remote.inference.sample",
|
||||||
config_class="llama_stack.providers.adapters.inference.sample.SampleConfig",
|
config_class="llama_stack.providers.remote.inference.sample.SampleConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -57,8 +57,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="ollama",
|
adapter_type="ollama",
|
||||||
pip_packages=["ollama", "aiohttp"],
|
pip_packages=["ollama", "aiohttp"],
|
||||||
config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig",
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||||
module="llama_stack.providers.adapters.inference.ollama",
|
module="llama_stack.providers.remote.inference.ollama",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -66,8 +66,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="vllm",
|
adapter_type="vllm",
|
||||||
pip_packages=["openai"],
|
pip_packages=["openai"],
|
||||||
module="llama_stack.providers.adapters.inference.vllm",
|
module="llama_stack.providers.remote.inference.vllm",
|
||||||
config_class="llama_stack.providers.adapters.inference.vllm.VLLMInferenceAdapterConfig",
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -75,8 +75,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="tgi",
|
adapter_type="tgi",
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.adapters.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -84,8 +84,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="hf::serverless",
|
adapter_type="hf::serverless",
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.adapters.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceAPIImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -93,8 +93,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="hf::endpoint",
|
adapter_type="hf::endpoint",
|
||||||
pip_packages=["huggingface_hub", "aiohttp"],
|
pip_packages=["huggingface_hub", "aiohttp"],
|
||||||
module="llama_stack.providers.adapters.inference.tgi",
|
module="llama_stack.providers.remote.inference.tgi",
|
||||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceEndpointImplConfig",
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -104,8 +104,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"fireworks-ai",
|
"fireworks-ai",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.adapters.inference.fireworks",
|
module="llama_stack.providers.remote.inference.fireworks",
|
||||||
config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig",
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -115,9 +115,9 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"together",
|
"together",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.adapters.inference.together",
|
module="llama_stack.providers.remote.inference.together",
|
||||||
config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig",
|
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||||
provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator",
|
provider_data_validator="llama_stack.providers.remote.safety.together.TogetherProviderDataValidator",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -125,8 +125,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
adapter=AdapterSpec(
|
adapter=AdapterSpec(
|
||||||
adapter_type="bedrock",
|
adapter_type="bedrock",
|
||||||
pip_packages=["boto3"],
|
pip_packages=["boto3"],
|
||||||
module="llama_stack.providers.adapters.inference.bedrock",
|
module="llama_stack.providers.remote.inference.bedrock",
|
||||||
config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig",
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
remote_provider_spec(
|
remote_provider_spec(
|
||||||
|
@ -136,8 +136,8 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"openai",
|
"openai",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.adapters.inference.databricks",
|
module="llama_stack.providers.remote.inference.databricks",
|
||||||
config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig",
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||||
),
|
),
|
||||||
),
|
),
|
||||||
InlineProviderSpec(
|
InlineProviderSpec(
|
||||||
|
@ -146,7 +146,7 @@ def available_providers() -> List[ProviderSpec]:
|
||||||
pip_packages=[
|
pip_packages=[
|
||||||
"vllm",
|
"vllm",
|
||||||
],
|
],
|
||||||
module="llama_stack.providers.impls.vllm",
|
module="llama_stack.providers.inline.vllm",
|
||||||
config_class="llama_stack.providers.impls.vllm.VLLMConfig",
|
config_class="llama_stack.providers.inline.vllm.VLLMConfig",
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue