mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-19 14:19:40 +00:00
Merge branch 'main' into clarifai-inference-provider
This commit is contained in:
commit
4b9085d312
536 changed files with 34661 additions and 12116 deletions
|
|
@ -9,40 +9,74 @@ from typing import List
|
|||
from llama_stack.distribution.datatypes import * # noqa: F403
|
||||
|
||||
|
||||
META_REFERENCE_DEPS = [
|
||||
"accelerate",
|
||||
"blobfile",
|
||||
"fairscale",
|
||||
"torch",
|
||||
"torchvision",
|
||||
"transformers",
|
||||
"zmq",
|
||||
"lm-format-enforcer",
|
||||
]
|
||||
|
||||
|
||||
def available_providers() -> List[ProviderSpec]:
|
||||
return [
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
provider_type="meta-reference",
|
||||
provider_type="inline::meta-reference",
|
||||
pip_packages=META_REFERENCE_DEPS,
|
||||
module="llama_stack.providers.inline.inference.meta_reference",
|
||||
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
provider_type="inline::meta-reference-quantized",
|
||||
pip_packages=(
|
||||
META_REFERENCE_DEPS
|
||||
+ [
|
||||
"fbgemm-gpu",
|
||||
"torchao==0.5.0",
|
||||
]
|
||||
),
|
||||
module="llama_stack.providers.inline.inference.meta_reference",
|
||||
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceQuantizedInferenceConfig",
|
||||
),
|
||||
InlineProviderSpec(
|
||||
api=Api.inference,
|
||||
provider_type="inline::vllm",
|
||||
pip_packages=[
|
||||
"accelerate",
|
||||
"blobfile",
|
||||
"fairscale",
|
||||
"fbgemm-gpu==0.8.0",
|
||||
"torch",
|
||||
"torchvision",
|
||||
"transformers",
|
||||
"zmq",
|
||||
"vllm",
|
||||
],
|
||||
module="llama_stack.providers.impls.meta_reference.inference",
|
||||
config_class="llama_stack.providers.impls.meta_reference.inference.MetaReferenceImplConfig",
|
||||
module="llama_stack.providers.inline.inference.vllm",
|
||||
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="sample",
|
||||
pip_packages=[],
|
||||
module="llama_stack.providers.adapters.inference.sample",
|
||||
config_class="llama_stack.providers.adapters.inference.sample.SampleConfig",
|
||||
module="llama_stack.providers.remote.inference.sample",
|
||||
config_class="llama_stack.providers.remote.inference.sample.SampleConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="ollama",
|
||||
pip_packages=["ollama"],
|
||||
config_class="llama_stack.providers.adapters.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.adapters.inference.ollama",
|
||||
pip_packages=["ollama", "aiohttp"],
|
||||
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
||||
module="llama_stack.providers.remote.inference.ollama",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="vllm",
|
||||
pip_packages=["openai"],
|
||||
module="llama_stack.providers.remote.inference.vllm",
|
||||
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -50,8 +84,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="tgi",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.TGIImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -59,8 +93,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="hf::serverless",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceAPIImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -68,8 +102,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="hf::endpoint",
|
||||
pip_packages=["huggingface_hub", "aiohttp"],
|
||||
module="llama_stack.providers.adapters.inference.tgi",
|
||||
config_class="llama_stack.providers.adapters.inference.tgi.InferenceEndpointImplConfig",
|
||||
module="llama_stack.providers.remote.inference.tgi",
|
||||
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -79,8 +113,9 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"fireworks-ai",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.fireworks",
|
||||
config_class="llama_stack.providers.adapters.inference.fireworks.FireworksImplConfig",
|
||||
module="llama_stack.providers.remote.inference.fireworks",
|
||||
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -90,9 +125,9 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"together",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.together",
|
||||
config_class="llama_stack.providers.adapters.inference.together.TogetherImplConfig",
|
||||
provider_data_validator="llama_stack.providers.adapters.safety.together.TogetherProviderDataValidator",
|
||||
module="llama_stack.providers.remote.inference.together",
|
||||
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
||||
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -100,8 +135,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
adapter=AdapterSpec(
|
||||
adapter_type="bedrock",
|
||||
pip_packages=["boto3"],
|
||||
module="llama_stack.providers.adapters.inference.bedrock",
|
||||
config_class="llama_stack.providers.adapters.inference.bedrock.BedrockConfig",
|
||||
module="llama_stack.providers.remote.inference.bedrock",
|
||||
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -111,8 +146,8 @@ def available_providers() -> List[ProviderSpec]:
|
|||
pip_packages=[
|
||||
"openai",
|
||||
],
|
||||
module="llama_stack.providers.adapters.inference.databricks",
|
||||
config_class="llama_stack.providers.adapters.inference.databricks.DatabricksImplConfig",
|
||||
module="llama_stack.providers.remote.inference.databricks",
|
||||
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
||||
),
|
||||
),
|
||||
remote_provider_spec(
|
||||
|
|
@ -126,13 +161,15 @@ def available_providers() -> List[ProviderSpec]:
|
|||
config_class="llama_stack.providers.adapters.inference.clarifai.ClarifaiImplConfig",
|
||||
),
|
||||
),
|
||||
InlineProviderSpec(
|
||||
remote_provider_spec(
|
||||
api=Api.inference,
|
||||
provider_type="vllm",
|
||||
pip_packages=[
|
||||
"vllm",
|
||||
],
|
||||
module="llama_stack.providers.impls.vllm",
|
||||
config_class="llama_stack.providers.impls.vllm.VLLMConfig",
|
||||
adapter=AdapterSpec(
|
||||
adapter_type="nvidia",
|
||||
pip_packages=[
|
||||
"openai",
|
||||
],
|
||||
module="llama_stack.providers.remote.inference.nvidia",
|
||||
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
||||
),
|
||||
),
|
||||
]
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue