# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. from llama_stack.providers.datatypes import ( AdapterSpec, Api, InlineProviderSpec, ProviderSpec, remote_provider_spec, ) META_REFERENCE_DEPS = [ "accelerate", "fairscale", "torch", "torchvision", "transformers", "zmq", "lm-format-enforcer", "sentence-transformers", "torchao==0.8.0", "fbgemm-gpu-genai==1.1.2", ] def available_providers() -> list[ProviderSpec]: return [ InlineProviderSpec( api=Api.inference, provider_type="inline::meta-reference", pip_packages=META_REFERENCE_DEPS, module="llama_stack.providers.inline.inference.meta_reference", config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig", description="Meta's reference implementation of inference with support for various model formats and optimization techniques.", ), InlineProviderSpec( api=Api.inference, provider_type="inline::vllm", pip_packages=[ "vllm", ], module="llama_stack.providers.inline.inference.vllm", config_class="llama_stack.providers.inline.inference.vllm.VLLMConfig", description="vLLM inference provider for high-performance model serving with PagedAttention and continuous batching.", ), InlineProviderSpec( api=Api.inference, provider_type="inline::sentence-transformers", pip_packages=[ "torch torchvision --index-url https://download.pytorch.org/whl/cpu", "sentence-transformers --no-deps", ], module="llama_stack.providers.inline.inference.sentence_transformers", config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig", description="Sentence Transformers inference provider for text embeddings and similarity search.", ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="cerebras", pip_packages=[ "cerebras_cloud_sdk", ], module="llama_stack.providers.remote.inference.cerebras", config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig", description="Cerebras inference provider for running models on Cerebras Cloud platform.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="ollama", pip_packages=["ollama", "aiohttp", "h11>=0.16.0"], config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig", module="llama_stack.providers.remote.inference.ollama", description="Ollama inference provider for running local models through the Ollama runtime.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="vllm", pip_packages=["openai"], module="llama_stack.providers.remote.inference.vllm", config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig", description="Remote vLLM inference provider for connecting to vLLM servers.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="tgi", pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig", description="Text Generation Inference (TGI) provider for HuggingFace model serving.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="hf::serverless", pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig", description="HuggingFace Inference API serverless provider for on-demand model inference.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="hf::endpoint", pip_packages=["huggingface_hub", "aiohttp"], module="llama_stack.providers.remote.inference.tgi", config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig", description="HuggingFace Inference Endpoints provider for dedicated model serving.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="fireworks", pip_packages=[ "fireworks-ai", ], module="llama_stack.providers.remote.inference.fireworks", config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig", provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator", description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="together", pip_packages=[ "together", ], module="llama_stack.providers.remote.inference.together", config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig", provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator", description="Together AI inference provider for open-source models and collaborative AI development.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="bedrock", pip_packages=["boto3"], module="llama_stack.providers.remote.inference.bedrock", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="databricks", pip_packages=[ "openai", ], module="llama_stack.providers.remote.inference.databricks", config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig", description="Databricks inference provider for running models on Databricks' unified analytics platform.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="nvidia", pip_packages=[ "openai", ], module="llama_stack.providers.remote.inference.nvidia", config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig", description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="runpod", pip_packages=["openai"], module="llama_stack.providers.remote.inference.runpod", config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig", description="RunPod inference provider for running models on RunPod's cloud GPU platform.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="openai", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.openai", config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig", provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator", description="OpenAI inference provider for accessing GPT models and other OpenAI services.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="anthropic", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.anthropic", config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig", provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator", description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="gemini", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.gemini", config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig", provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator", description="Google Gemini inference provider for accessing Gemini models and Google's AI services.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="groq", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.groq", config_class="llama_stack.providers.remote.inference.groq.GroqConfig", provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator", description="Groq inference provider for ultra-fast inference using Groq's LPU technology.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="fireworks-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.fireworks_openai_compat", config_class="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.fireworks_openai_compat.config.FireworksProviderDataValidator", description="Fireworks AI OpenAI-compatible provider for using Fireworks models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="llama-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.llama_openai_compat", config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator", description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="together-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.together_openai_compat", config_class="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.together_openai_compat.config.TogetherProviderDataValidator", description="Together AI OpenAI-compatible provider for using Together models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="groq-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.groq_openai_compat", config_class="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.groq_openai_compat.config.GroqProviderDataValidator", description="Groq OpenAI-compatible provider for using Groq models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="sambanova-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.sambanova_openai_compat", config_class="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.sambanova_openai_compat.config.SambaNovaProviderDataValidator", description="SambaNova OpenAI-compatible provider for using SambaNova models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="cerebras-openai-compat", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.cerebras_openai_compat", config_class="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasCompatConfig", provider_data_validator="llama_stack.providers.remote.inference.cerebras_openai_compat.config.CerebrasProviderDataValidator", description="Cerebras OpenAI-compatible provider for using Cerebras models with OpenAI API format.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="sambanova", pip_packages=["litellm"], module="llama_stack.providers.remote.inference.sambanova", config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig", provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator", description="SambaNova inference provider for running models on SambaNova's dataflow architecture.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="passthrough", pip_packages=[], module="llama_stack.providers.remote.inference.passthrough", config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig", provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator", description="Passthrough inference provider for connecting to any external inference service not directly supported.", ), ), remote_provider_spec( api=Api.inference, adapter=AdapterSpec( adapter_type="watsonx", pip_packages=["ibm_watson_machine_learning"], module="llama_stack.providers.remote.inference.watsonx", config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig", provider_data_validator="llama_stack.providers.remote.inference.watsonx.WatsonXProviderDataValidator", description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.", ), ), ]