forked from phoenix-oss/llama-stack-mirror
impls
-> inline
, adapters
-> remote
(#381)
This commit is contained in:
parent
b10e9f46bb
commit
994732e2e0
169 changed files with 106 additions and 105 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
|||
[submodule "llama_stack/providers/impls/ios/inference/executorch"]
|
||||
path = llama_stack/providers/impls/ios/inference/executorch
|
||||
path = llama_stack/providers/inline/ios/inference/executorch
|
||||
url = https://github.com/pytorch/executorch
|
||||
|
|
|
@ -6,8 +6,8 @@ This guide contains references to walk you through adding a new API provider.
|
|||
1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory).
|
||||
2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers:
|
||||
|
||||
- [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/adapters/inference)
|
||||
- [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/impls/meta_reference/inference)
|
||||
- [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/remote/inference)
|
||||
- [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/inline/meta_reference/inference)
|
||||
|
||||
3. [Build a Llama Stack distribution](https://llama-stack.readthedocs.io/en/latest/distribution_dev/building_distro.html) with your API provider.
|
||||
4. Test your code!
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
We offer both remote and on-device use of Llama Stack in Swift via two components:
|
||||
|
||||
1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/)
|
||||
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/impls/ios/inference)
|
||||
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/ios/inference)
|
||||
|
||||
```{image} ../../../../_static/remote_or_local.gif
|
||||
:alt: Seamlessly switching between local, on-device inference and remote hosted inference
|
||||
|
|
|
@ -102,7 +102,7 @@ ollama pull llama3.1:70b-instruct-fp16
|
|||
```
|
||||
|
||||
> [!NOTE]
|
||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models.
|
||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
||||
|
||||
|
||||
To serve a new model with `ollama`
|
||||
|
|
|
@ -386,7 +386,7 @@ ollama pull llama3.1:8b-instruct-fp16
|
|||
ollama pull llama3.1:70b-instruct-fp16
|
||||
```
|
||||
|
||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models.
|
||||
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
|
||||
|
||||
|
||||
To serve a new model with `ollama`
|
||||
|
|
|
@ -16,7 +16,7 @@ from llama_stack.apis.datasets import * # noqa: F403
|
|||
from autoevals.llm import Factuality
|
||||
from autoevals.ragas import AnswerCorrectness
|
||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||
aggregate_average,
|
||||
)
|
||||
|
|
@ -9,7 +9,7 @@ from typing import List
|
|||
from llama_stack.apis.inference import Message
|
||||
from llama_stack.apis.safety import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.agents.safety import ShieldRunnerMixin
|
||||
from llama_stack.providers.inline.meta_reference.agents.safety import ShieldRunnerMixin
|
||||
|
||||
from .builtin import BaseTool
|
||||
|
|
@ -27,7 +27,7 @@ from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
|
|||
|
||||
from llama_stack.apis.inference import QuantizationType
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.inference.config import (
|
||||
from llama_stack.providers.inline.meta_reference.inference.config import (
|
||||
MetaReferenceQuantizedInferenceConfig,
|
||||
)
|
||||
|
|
@ -8,9 +8,9 @@ import tempfile
|
|||
|
||||
import pytest
|
||||
from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef
|
||||
from llama_stack.providers.impls.meta_reference.memory.config import FaissImplConfig
|
||||
from llama_stack.providers.inline.meta_reference.memory.config import FaissImplConfig
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.memory.faiss import FaissMemoryImpl
|
||||
from llama_stack.providers.inline.meta_reference.memory.faiss import FaissMemoryImpl
|
||||
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig
|
||||
|
||||
|
|
@ -13,15 +13,15 @@ from llama_stack.apis.datasetio import * # noqa: F403
|
|||
from llama_stack.apis.datasets import * # noqa: F403
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
|
||||
EqualityScoringFn,
|
||||
)
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import (
|
||||
LlmAsJudgeScoringFn,
|
||||
)
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
|
||||
SubsetOfScoringFn,
|
||||
)
|
||||
|
|
@ -4,18 +4,18 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
BaseScoringFn,
|
||||
)
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||
aggregate_accuracy,
|
||||
)
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.equality import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.equality import (
|
||||
equality,
|
||||
)
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from llama_stack.apis.inference.inference import Inference
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
BaseScoringFn,
|
||||
)
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
|
@ -12,10 +12,10 @@ from llama_stack.apis.scoring import * # noqa: F401, F403
|
|||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
import re
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||
aggregate_average,
|
||||
)
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
|
||||
llm_as_judge_8b_correctness,
|
||||
)
|
||||
|
|
@ -4,17 +4,17 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
|
||||
BaseScoringFn,
|
||||
)
|
||||
from llama_stack.apis.scoring_functions import * # noqa: F401, F403
|
||||
from llama_stack.apis.scoring import * # noqa: F401, F403
|
||||
from llama_stack.apis.common.type_system import * # noqa: F403
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
|
||||
aggregate_accuracy,
|
||||
)
|
||||
|
||||
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
|
||||
from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
|
||||
subset_of,
|
||||
)
|
||||
|
|
@ -22,8 +22,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
"scikit-learn",
|
||||
]
|
||||
+ kvstore_dependencies(),
|
||||
module="llama_stack.providers.impls.meta_reference.agents",
|
||||
config_class="llama_stack.providers.impls.meta_reference.agents.MetaReferenceAgentsImplConfig",
|
||||
module="llama_stack.providers.inline.meta_reference.agents",
|
||||
config_class="llama_stack.providers.inline.meta_reference.agents.MetaReferenceAgentsImplConfig",
|
||||
api_dependencies=[
|
||||
Api.inference,
|
||||
Api.safety,
|
||||
|
@ -36,8 +36,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="sample",
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.adapters.agents.sample",
|
||||
config_class="llama_stack.providers.adapters.agents.sample.SampleConfig",
|
||||
module="llama_stack.providers.remote.agents.sample",
|
||||
config_class="llama_stack.providers.remote.agents.sample.SampleConfig",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
api=Api.datasetio,
|
||||
provider_type="meta-reference",
|
||||
pip_packages=["pandas"],
|
||||
module="llama_stack.providers.impls.meta_reference.datasetio",
|
||||
config_class="llama_stack.providers.impls.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
|
||||
module="llama_stack.providers.inline.meta_reference.datasetio",
|
||||
config_class="llama_stack.providers.inline.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
|
||||
api_dependencies=[],
|
||||
),
|
||||
]
|
||||
|
|
|
@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
api=Api.eval,
|
||||
provider_type="meta-reference",
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.impls.meta_reference.eval",
|
||||
config_class="llama_stack.providers.impls.meta_reference.eval.MetaReferenceEvalConfig",
|
||||
module="llama_stack.providers.inline.meta_reference.eval",
|
||||
config_class="llama_stack.providers.inline.meta_reference.eval.MetaReferenceEvalConfig",
|
||||
api_dependencies=[
|
||||
Api.datasetio,
|
||||
Api.datasets,
|
||||
|
|
|
@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
api=Api.inference,
|
||||
provider_type="meta-reference",
|
||||
pip_packages=META_REFERENCE_DEPS,
|
||||
module="llama_stack.providers.impls.meta_reference.inference",
|
||||
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceInferenceConfig",
|
||||
module="llama_stack.providers.inline.meta_reference.inference",
|
||||
config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceInferenceConfig",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
|
@ -40,16 +40,16 @@ def available_providers() -> List[ProviderSpec]:
|
|||
"torchao==0.5.0",
|
||||
]
|
||||
),
|
||||
module="llama_stack.providers.impls.meta_reference.inference",
|
||||
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
|
||||
module="llama_stack.providers.inline.meta_reference.inference",
|
||||
config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="sample",
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.adapters.inference.sample",
|
||||
config_class="llama_stack.providers.adapters.inference.sample.SampleConfig",
|
||||
module="llama_stack.providers.remote.inference.sample",
|
||||
config_class="llama_stack.providers.remote.inference.sample.SampleConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -57,8 +57,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="ollama",
|
||||
pip_packages=["ollama", "aiohttp"],
|
||||
config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.adapters.inference.ollama",
|
||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.remote.inference.ollama",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -66,8 +66,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="vllm",
|
||||
pip_packages=["openai"],
|
||||
module="llama_stack.providers.adapters.inference.vllm",
|
||||
config_class="llama_stack.providers.adapters.inference.vllm.VLLMInferenceAdapterConfig",
|
||||
module="llama_stack.providers.remote.inference.vllm",
|
||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -75,8 +75,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="tgi",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -84,8 +84,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="hf::serverless",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceAPIImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -93,8 +93,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="hf::endpoint",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceEndpointImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -104,8 +104,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"fireworks-ai",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.fireworks",
|
||||
config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig",
|
||||
module="llama_stack.providers.remote.inference.fireworks",
|
||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -115,9 +115,9 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"together",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.together",
|
||||
config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig",
|
||||
provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator",
|
||||
module="llama_stack.providers.remote.inference.together",
|
||||
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.safety.together.TogetherProviderDataValidator",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -125,8 +125,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="bedrock",
|
||||
pip_packages=["boto3"],
|
||||
module="llama_stack.providers.adapters.inference.bedrock",
|
||||
config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig",
|
||||
module="llama_stack.providers.remote.inference.bedrock",
|
||||
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
@ -136,8 +136,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"openai",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.databricks",
|
||||
config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig",
|
||||
module="llama_stack.providers.remote.inference.databricks",
|
||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||
),
|
||||
),
|
||||
InlineProviderSpec(
|
||||
|
@ -146,7 +146,7 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"vllm",
|
||||
],
|
||||
module="llama_stack.providers.impls.vllm",
|
||||
config_class="llama_stack.providers.impls.vllm.VLLMConfig",
|
||||
module="llama_stack.providers.inline.vllm",
|
||||
config_class="llama_stack.providers.inline.vllm.VLLMConfig",
|
||||
),
|
||||
]
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue