mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-27 18:50:41 +00:00
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 2s
Integration Tests / test-matrix (http, 3.12, inspect) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.12, post_training) (push) Failing after 7s
Integration Tests / test-matrix (http, 3.13, agents) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, vector_io) (push) Failing after 13s
Integration Tests / test-matrix (http, 3.13, providers) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, agents) (push) Failing after 21s
Integration Tests / test-matrix (library, 3.13, agents) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, inference) (push) Failing after 17s
Integration Tests / test-matrix (http, 3.13, inspect) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.12, inference) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.12, datasets) (push) Failing after 9s
Integration Tests / test-matrix (http, 3.13, datasets) (push) Failing after 18s
Integration Tests / test-matrix (http, 3.12, providers) (push) Failing after 19s
Integration Tests / test-matrix (library, 3.12, scoring) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, agents) (push) Failing after 18s
Integration Tests / test-matrix (library, 3.12, post_training) (push) Failing after 13s
Integration Tests / test-matrix (http, 3.13, tool_runtime) (push) Failing after 15s
Integration Tests / test-matrix (http, 3.13, post_training) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.12, tool_runtime) (push) Failing after 12s
Integration Tests / test-matrix (http, 3.12, datasets) (push) Failing after 20s
Integration Tests / test-matrix (library, 3.13, post_training) (push) Failing after 10s
Integration Tests / test-matrix (library, 3.12, inspect) (push) Failing after 14s
Integration Tests / test-matrix (library, 3.13, datasets) (push) Failing after 7s
Test Llama Stack Build / generate-matrix (push) Successful in 7s
Integration Tests / test-matrix (http, 3.13, scoring) (push) Failing after 16s
Integration Tests / test-matrix (library, 3.12, providers) (push) Failing after 16s
Integration Tests / test-matrix (http, 3.12, tool_runtime) (push) Failing after 18s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 8s
Python Package Build Test / build (3.12) (push) Failing after 5s
Integration Tests / test-matrix (library, 3.12, vector_io) (push) Failing after 17s
Python Package Build Test / build (3.13) (push) Failing after 4s
Test Llama Stack Build / build-single-provider (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, inspect) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, inference) (push) Failing after 11s
Integration Tests / test-matrix (http, 3.12, inference) (push) Failing after 26s
Integration Tests / test-matrix (http, 3.12, scoring) (push) Failing after 19s
Integration Tests / test-matrix (http, 3.13, vector_io) (push) Failing after 15s
Integration Tests / test-matrix (library, 3.13, scoring) (push) Failing after 11s
Integration Tests / test-matrix (library, 3.13, tool_runtime) (push) Failing after 8s
Test External Providers / test-external-providers (venv) (push) Failing after 8s
Integration Tests / test-matrix (library, 3.13, providers) (push) Failing after 10s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 12s
Unit Tests / unit-tests (3.12) (push) Failing after 7s
Integration Tests / test-matrix (library, 3.13, vector_io) (push) Failing after 10s
Unit Tests / unit-tests (3.13) (push) Failing after 6s
Update ReadTheDocs / update-readthedocs (push) Failing after 4s
Test Llama Stack Build / build (push) Failing after 7s
Pre-commit / pre-commit (push) Successful in 48s
# What does this PR do? The goal is to promote the minimal set of dependencies the project needs to run, this includes: * dependencies needed to work with the CLI * dependencies needed for the server to run with no providers This also: * Relocate redundant dependencies out of the core project and into the individual providers that actually require them. * Include all necessary server dependencies so the project can run standalone, even without any providers. <!-- Provide a short summary of what this PR does and why. Link to relevant issues if applicable. --> <!-- If resolving an issue, uncomment and update the line below --> <!-- Closes #[issue-number] --> ## Test Plan Build and run distro a server. Signed-off-by: Sébastien Han <seb@redhat.com>
308 lines
13 KiB
Python
308 lines
13 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
|
|
from llama_stack.providers.datatypes import (
|
|
AdapterSpec,
|
|
Api,
|
|
InlineProviderSpec,
|
|
ProviderSpec,
|
|
remote_provider_spec,
|
|
)
|
|
|
|
META_REFERENCE_DEPS = [
|
|
"accelerate",
|
|
"fairscale",
|
|
"torch",
|
|
"torchvision",
|
|
"transformers",
|
|
"zmq",
|
|
"lm-format-enforcer",
|
|
"sentence-transformers",
|
|
"torchao==0.8.0",
|
|
"fbgemm-gpu-genai==1.1.2",
|
|
]
|
|
|
|
|
|
def available_providers() -> list[ProviderSpec]:
|
|
return [
|
|
InlineProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="inline::meta-reference",
|
|
pip_packages=META_REFERENCE_DEPS,
|
|
module="llama_stack.providers.inline.inference.meta_reference",
|
|
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="inline::vllm",
|
|
pip_packages=[
|
|
"vllm",
|
|
],
|
|
module="llama_stack.providers.inline.inference.vllm",
|
|
config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig",
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="inline::sentence-transformers",
|
|
pip_packages=[
|
|
"torch torchvision --index-url https://download.pytorch.org/whl/cpu",
|
|
"sentence-transformers --no-deps",
|
|
],
|
|
module="llama_stack.providers.inline.inference.sentence_transformers",
|
|
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="cerebras",
|
|
pip_packages=[
|
|
"cerebras_cloud_sdk",
|
|
],
|
|
module="llama_stack.providers.remote.inference.cerebras",
|
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="ollama",
|
|
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
|
module="llama_stack.providers.remote.inference.ollama",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="vllm",
|
|
pip_packages=["openai"],
|
|
module="llama_stack.providers.remote.inference.vllm",
|
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="tgi",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="hf::serverless",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="hf::endpoint",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="fireworks",
|
|
pip_packages=[
|
|
"fireworks-ai",
|
|
],
|
|
module="llama_stack.providers.remote.inference.fireworks",
|
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="together",
|
|
pip_packages=[
|
|
"together",
|
|
],
|
|
module="llama_stack.providers.remote.inference.together",
|
|
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="bedrock",
|
|
pip_packages=["boto3"],
|
|
module="llama_stack.providers.remote.inference.bedrock",
|
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="databricks",
|
|
pip_packages=[
|
|
"openai",
|
|
],
|
|
module="llama_stack.providers.remote.inference.databricks",
|
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="nvidia",
|
|
pip_packages=[
|
|
"openai",
|
|
],
|
|
module="llama_stack.providers.remote.inference.nvidia",
|
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="runpod",
|
|
pip_packages=["openai"],
|
|
module="llama_stack.providers.remote.inference.runpod",
|
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="openai",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.openai",
|
|
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="anthropic",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.anthropic",
|
|
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="gemini",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.gemini",
|
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="groq",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.groq",
|
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="fireworks-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.fireworks_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="llama-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="together-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.together_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="groq-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.groq_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="sambanova-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.sambanova_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="cerebras-openai-compat",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.cerebras_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="sambanova",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.sambanova",
|
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="passthrough",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.passthrough",
|
|
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
|
),
|
|
),
|
|
remote_provider_spec(
|
|
api=Api.inference,
|
|
adapter=AdapterSpec(
|
|
adapter_type="watsonx",
|
|
pip_packages=["ibm_watson_machine_learning"],
|
|
module="llama_stack.providers.remote.inference.watsonx",
|
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator",
|
|
),
|
|
),
|
|
]
|