impls -> inline, adapters -> remote (#381)

This commit is contained in:
Ashwin Bharambe 2024-11-06 14:54:05 -08:00 committed by GitHub
parent b10e9f46bb
commit 994732e2e0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
169 changed files with 106 additions and 105 deletions

2
.gitmodules vendored
View file

@ -1,3 +1,3 @@
[submodule "llama_stack/providers/impls/ios/inference/executorch"] [submodule "llama_stack/providers/impls/ios/inference/executorch"]
path = llama_stack/providers/impls/ios/inference/executorch path = llama_stack/providers/inline/ios/inference/executorch
url = https://github.com/pytorch/executorch url = https://github.com/pytorch/executorch

View file

@ -6,8 +6,8 @@ This guide contains references to walk you through adding a new API provider.
1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory). 1. First, decide which API your provider falls into (e.g. Inference, Safety, Agents, Memory).
2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers: 2. Decide whether your provider is a remote provider, or inline implmentation. A remote provider is a provider that makes a remote request to an service. An inline provider is a provider where implementation is executed locally. Checkout the examples, and follow the structure to add your own API provider. Please find the following code pointers:
- [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/adapters/inference) - [Inference Remote Adapter](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/remote/inference)
- [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/impls/meta_reference/inference) - [Inference Inline Provider](https://github.com/meta-llama/llama-stack/tree/docs/llama_stack/providers/inline/meta_reference/inference)
3. [Build a Llama Stack distribution](https://llama-stack.readthedocs.io/en/latest/distribution_dev/building_distro.html) with your API provider. 3. [Build a Llama Stack distribution](https://llama-stack.readthedocs.io/en/latest/distribution_dev/building_distro.html) with your API provider.
4. Test your code! 4. Test your code!

View file

@ -3,7 +3,7 @@
We offer both remote and on-device use of Llama Stack in Swift via two components: We offer both remote and on-device use of Llama Stack in Swift via two components:
1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/) 1. [llama-stack-client-swift](https://github.com/meta-llama/llama-stack-client-swift/)
2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/impls/ios/inference) 2. [LocalInferenceImpl](https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/inline/ios/inference)
```{image} ../../../../_static/remote_or_local.gif ```{image} ../../../../_static/remote_or_local.gif
:alt: Seamlessly switching between local, on-device inference and remote hosted inference :alt: Seamlessly switching between local, on-device inference and remote hosted inference

View file

@ -102,7 +102,7 @@ ollama pull llama3.1:70b-instruct-fp16
``` ```
> [!NOTE] > [!NOTE]
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models. > Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
To serve a new model with `ollama` To serve a new model with `ollama`

View file

@ -386,7 +386,7 @@ ollama pull llama3.1:8b-instruct-fp16
ollama pull llama3.1:70b-instruct-fp16 ollama pull llama3.1:70b-instruct-fp16
``` ```
> Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/adapters/inference/ollama/ollama.py) for the supported Ollama models. > Please check the [OLLAMA_SUPPORTED_MODELS](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers.remote/inference/ollama/ollama.py) for the supported Ollama models.
To serve a new model with `ollama` To serve a new model with `ollama`

View file

@ -16,7 +16,7 @@ from llama_stack.apis.datasets import * # noqa: F403
from autoevals.llm import Factuality from autoevals.llm import Factuality
from autoevals.ragas import AnswerCorrectness from autoevals.ragas import AnswerCorrectness
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_average, aggregate_average,
) )

View file

@ -9,7 +9,7 @@ from typing import List
from llama_stack.apis.inference import Message from llama_stack.apis.inference import Message
from llama_stack.apis.safety import * # noqa: F403 from llama_stack.apis.safety import * # noqa: F403
from llama_stack.providers.impls.meta_reference.agents.safety import ShieldRunnerMixin from llama_stack.providers.inline.meta_reference.agents.safety import ShieldRunnerMixin
from .builtin import BaseTool from .builtin import BaseTool

View file

@ -27,7 +27,7 @@ from torchao.quantization.GPTQ import Int8DynActInt4WeightLinear
from llama_stack.apis.inference import QuantizationType from llama_stack.apis.inference import QuantizationType
from llama_stack.providers.impls.meta_reference.inference.config import ( from llama_stack.providers.inline.meta_reference.inference.config import (
MetaReferenceQuantizedInferenceConfig, MetaReferenceQuantizedInferenceConfig,
) )

View file

@ -8,9 +8,9 @@ import tempfile
import pytest import pytest
from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef from llama_stack.apis.memory import MemoryBankType, VectorMemoryBankDef
from llama_stack.providers.impls.meta_reference.memory.config import FaissImplConfig from llama_stack.providers.inline.meta_reference.memory.config import FaissImplConfig
from llama_stack.providers.impls.meta_reference.memory.faiss import FaissMemoryImpl from llama_stack.providers.inline.meta_reference.memory.faiss import FaissMemoryImpl
from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig from llama_stack.providers.utils.kvstore.config import SqliteKVStoreConfig

View file

@ -13,15 +13,15 @@ from llama_stack.apis.datasetio import * # noqa: F403
from llama_stack.apis.datasets import * # noqa: F403 from llama_stack.apis.datasets import * # noqa: F403
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference.inference import Inference
from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate from llama_stack.providers.datatypes import ScoringFunctionsProtocolPrivate
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.equality_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.equality_scoring_fn import (
EqualityScoringFn, EqualityScoringFn,
) )
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.llm_as_judge_scoring_fn import (
LlmAsJudgeScoringFn, LlmAsJudgeScoringFn,
) )
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.subset_of_scoring_fn import (
SubsetOfScoringFn, SubsetOfScoringFn,
) )

View file

@ -4,18 +4,18 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn, BaseScoringFn,
) )
from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403 from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403 from llama_stack.apis.common.type_system import * # noqa: F403
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_accuracy, aggregate_accuracy,
) )
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.equality import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.equality import (
equality, equality,
) )

View file

@ -4,7 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.apis.inference.inference import Inference from llama_stack.apis.inference.inference import Inference
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn, BaseScoringFn,
) )
from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring_functions import * # noqa: F401, F403
@ -12,10 +12,10 @@ from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403 from llama_stack.apis.common.type_system import * # noqa: F403
import re import re
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_average, aggregate_average,
) )
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.llm_as_judge_8b_correctness import (
llm_as_judge_8b_correctness, llm_as_judge_8b_correctness,
) )

View file

@ -4,17 +4,17 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.base_scoring_fn import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.base_scoring_fn import (
BaseScoringFn, BaseScoringFn,
) )
from llama_stack.apis.scoring_functions import * # noqa: F401, F403 from llama_stack.apis.scoring_functions import * # noqa: F401, F403
from llama_stack.apis.scoring import * # noqa: F401, F403 from llama_stack.apis.scoring import * # noqa: F401, F403
from llama_stack.apis.common.type_system import * # noqa: F403 from llama_stack.apis.common.type_system import * # noqa: F403
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.common import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.common import (
aggregate_accuracy, aggregate_accuracy,
) )
from llama_stack.providers.impls.meta_reference.scoring.scoring_fn.fn_defs.subset_of import ( from llama_stack.providers.inline.meta_reference.scoring.scoring_fn.fn_defs.subset_of import (
subset_of, subset_of,
) )

View file

@ -22,8 +22,8 @@ def available_providers() -> List[ProviderSpec]:
"scikit-learn", "scikit-learn",
] ]
+ kvstore_dependencies(), + kvstore_dependencies(),
module="llama_stack.providers.impls.meta_reference.agents", module="llama_stack.providers.inline.meta_reference.agents",
config_class="llama_stack.providers.impls.meta_reference.agents.MetaReferenceAgentsImplConfig", config_class="llama_stack.providers.inline.meta_reference.agents.MetaReferenceAgentsImplConfig",
api_dependencies=[ api_dependencies=[
Api.inference, Api.inference,
Api.safety, Api.safety,
@ -36,8 +36,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="sample", adapter_type="sample",
pip_packages=[], pip_packages=[],
module="llama_stack.providers.adapters.agents.sample", module="llama_stack.providers.remote.agents.sample",
config_class="llama_stack.providers.adapters.agents.sample.SampleConfig", config_class="llama_stack.providers.remote.agents.sample.SampleConfig",
), ),
), ),
] ]

View file

@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.datasetio, api=Api.datasetio,
provider_type="meta-reference", provider_type="meta-reference",
pip_packages=["pandas"], pip_packages=["pandas"],
module="llama_stack.providers.impls.meta_reference.datasetio", module="llama_stack.providers.inline.meta_reference.datasetio",
config_class="llama_stack.providers.impls.meta_reference.datasetio.MetaReferenceDatasetIOConfig", config_class="llama_stack.providers.inline.meta_reference.datasetio.MetaReferenceDatasetIOConfig",
api_dependencies=[], api_dependencies=[],
), ),
] ]

View file

@ -15,8 +15,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.eval, api=Api.eval,
provider_type="meta-reference", provider_type="meta-reference",
pip_packages=[], pip_packages=[],
module="llama_stack.providers.impls.meta_reference.eval", module="llama_stack.providers.inline.meta_reference.eval",
config_class="llama_stack.providers.impls.meta_reference.eval.MetaReferenceEvalConfig", config_class="llama_stack.providers.inline.meta_reference.eval.MetaReferenceEvalConfig",
api_dependencies=[ api_dependencies=[
Api.datasetio, Api.datasetio,
Api.datasets, Api.datasets,

View file

@ -27,8 +27,8 @@ def available_providers() -> List[ProviderSpec]:
api=Api.inference, api=Api.inference,
provider_type="meta-reference", provider_type="meta-reference",
pip_packages=META_REFERENCE_DEPS, pip_packages=META_REFERENCE_DEPS,
module="llama_stack.providers.impls.meta_reference.inference", module="llama_stack.providers.inline.meta_reference.inference",
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceInferenceConfig", config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceInferenceConfig",
), ),
InlineProviderSpec( InlineProviderSpec(
api=Api.inference, api=Api.inference,
@ -40,16 +40,16 @@ def available_providers() -> List[ProviderSpec]:
"torchao==0.5.0", "torchao==0.5.0",
] ]
), ),
module="llama_stack.providers.impls.meta_reference.inference", module="llama_stack.providers.inline.meta_reference.inference",
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceQuantizedInferenceConfig", config_class="llama_stack.providers.inline.meta_reference.inference.MetaReferenceQuantizedInferenceConfig",
), ),
remote_provider_spec( remote_provider_spec(
api=Api.inference, api=Api.inference,
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="sample", adapter_type="sample",
pip_packages=[], pip_packages=[],
module="llama_stack.providers.adapters.inference.sample", module="llama_stack.providers.remote.inference.sample",
config_class="llama_stack.providers.adapters.inference.sample.SampleConfig", config_class="llama_stack.providers.remote.inference.sample.SampleConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -57,8 +57,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="ollama", adapter_type="ollama",
pip_packages=["ollama", "aiohttp"], pip_packages=["ollama", "aiohttp"],
config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig", config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
module="llama_stack.providers.adapters.inference.ollama", module="llama_stack.providers.remote.inference.ollama",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -66,8 +66,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="vllm", adapter_type="vllm",
pip_packages=["openai"], pip_packages=["openai"],
module="llama_stack.providers.adapters.inference.vllm", module="llama_stack.providers.remote.inference.vllm",
config_class="llama_stack.providers.adapters.inference.vllm.VLLMInferenceAdapterConfig", config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -75,8 +75,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="tgi", adapter_type="tgi",
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.adapters.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig", config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -84,8 +84,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="hf::serverless", adapter_type="hf::serverless",
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.adapters.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.adapters.inference.tgi.InferenceAPIImplConfig", config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -93,8 +93,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="hf::endpoint", adapter_type="hf::endpoint",
pip_packages=["huggingface_hub", "aiohttp"], pip_packages=["huggingface_hub", "aiohttp"],
module="llama_stack.providers.adapters.inference.tgi", module="llama_stack.providers.remote.inference.tgi",
config_class="llama_stack.providers.adapters.inference.tgi.InferenceEndpointImplConfig", config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -104,8 +104,8 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=[ pip_packages=[
"fireworks-ai", "fireworks-ai",
], ],
module="llama_stack.providers.adapters.inference.fireworks", module="llama_stack.providers.remote.inference.fireworks",
config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig", config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -115,9 +115,9 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=[ pip_packages=[
"together", "together",
], ],
module="llama_stack.providers.adapters.inference.together", module="llama_stack.providers.remote.inference.together",
config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig", config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator", provider_data_validator="llama_stack.providers.remote.safety.together.TogetherProviderDataValidator",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -125,8 +125,8 @@ def available_providers() -> List[ProviderSpec]:
adapter=AdapterSpec( adapter=AdapterSpec(
adapter_type="bedrock", adapter_type="bedrock",
pip_packages=["boto3"], pip_packages=["boto3"],
module="llama_stack.providers.adapters.inference.bedrock", module="llama_stack.providers.remote.inference.bedrock",
config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
), ),
), ),
remote_provider_spec( remote_provider_spec(
@ -136,8 +136,8 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=[ pip_packages=[
"openai", "openai",
], ],
module="llama_stack.providers.adapters.inference.databricks", module="llama_stack.providers.remote.inference.databricks",
config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig", config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
), ),
), ),
InlineProviderSpec( InlineProviderSpec(
@ -146,7 +146,7 @@ def available_providers() -> List[ProviderSpec]:
pip_packages=[ pip_packages=[
"vllm", "vllm",
], ],
module="llama_stack.providers.impls.vllm", module="llama_stack.providers.inline.vllm",
config_class="llama_stack.providers.impls.vllm.VLLMConfig", config_class="llama_stack.providers.inline.vllm.VLLMConfig",
), ),
] ]

Some files were not shown because too many files have changed in this diff Show more