mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-15 22:47:59 +00:00
Some checks failed
Integration Auth Tests / test-matrix (oauth2_token) (push) Failing after 4s
Integration Tests (Replay) / Integration Tests (, , , client=, ) (push) Failing after 4s
Test Llama Stack Build / build-single-provider (push) Failing after 3s
Test Llama Stack Build / generate-matrix (push) Successful in 5s
SqlStore Integration Tests / test-postgres (3.13) (push) Failing after 9s
SqlStore Integration Tests / test-postgres (3.12) (push) Failing after 9s
Test External Providers Installed via Module / test-external-providers-from-module (venv) (push) Has been skipped
Python Package Build Test / build (3.12) (push) Failing after 1s
Python Package Build Test / build (3.13) (push) Failing after 1s
Vector IO Integration Tests / test-matrix (push) Failing after 9s
Test Llama Stack Build / build-custom-container-distribution (push) Failing after 3s
API Conformance Tests / check-schema-compatibility (push) Successful in 13s
Test Llama Stack Build / build-ubi9-container-distribution (push) Failing after 4s
Unit Tests / unit-tests (3.12) (push) Failing after 4s
Unit Tests / unit-tests (3.13) (push) Failing after 3s
Test External API and Providers / test-external (venv) (push) Failing after 5s
Test Llama Stack Build / build (push) Failing after 31s
UI Tests / ui-tests (22) (push) Successful in 46s
Pre-commit / pre-commit (push) Successful in 2m13s
# What does this PR do? This PR fixes issues with the WatsonX provider so it works correctly with LiteLLM. The main problem was that WatsonX requests failed because the provider data validator didn’t properly handle the API key and project ID. This was fixed by updating the WatsonXProviderDataValidator and ensuring the provider data is loaded correctly. The openai_chat_completion method was also updated to match the behavior of other providers while adding WatsonX-specific fields like project_id. It still calls await super().openai_chat_completion.__func__(self, params) to keep the existing setup and tracing logic. After these changes, WatsonX requests now run correctly. ## Test Plan The changes were tested by running chat completion requests and confirming that credentials and project parameters are passed correctly. I have tested with my WatsonX credentials, by using the cli with `uv run llama-stack-client inference chat-completion --session` --------- Signed-off-by: Sébastien Han <seb@redhat.com> Co-authored-by: Sébastien Han <seb@redhat.com>
291 lines
14 KiB
Python
291 lines
14 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the terms described in the LICENSE file in
|
|
# the root directory of this source tree.
|
|
|
|
|
|
from llama_stack.providers.datatypes import (
|
|
Api,
|
|
InlineProviderSpec,
|
|
ProviderSpec,
|
|
RemoteProviderSpec,
|
|
)
|
|
|
|
META_REFERENCE_DEPS = [
|
|
"accelerate",
|
|
"fairscale",
|
|
"torch",
|
|
"torchvision",
|
|
"transformers",
|
|
"zmq",
|
|
"lm-format-enforcer",
|
|
"sentence-transformers",
|
|
"torchao==0.8.0",
|
|
"fbgemm-gpu-genai==1.1.2",
|
|
]
|
|
|
|
|
|
def available_providers() -> list[ProviderSpec]:
|
|
return [
|
|
InlineProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="inline::meta-reference",
|
|
pip_packages=META_REFERENCE_DEPS,
|
|
module="llama_stack.providers.inline.inference.meta_reference",
|
|
config_class="llama_stack.providers.inline.inference.meta_reference.MetaReferenceInferenceConfig",
|
|
description="Meta's reference implementation of inference with support for various model formats and optimization techniques.",
|
|
),
|
|
InlineProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="inline::sentence-transformers",
|
|
# CrossEncoder depends on torchao.quantization
|
|
pip_packages=[
|
|
"torch torchvision torchao>=0.12.0 --extra-index-url https://download.pytorch.org/whl/cpu",
|
|
"sentence-transformers --no-deps",
|
|
],
|
|
module="llama_stack.providers.inline.inference.sentence_transformers",
|
|
config_class="llama_stack.providers.inline.inference.sentence_transformers.config.SentenceTransformersInferenceConfig",
|
|
description="Sentence Transformers inference provider for text embeddings and similarity search.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="cerebras",
|
|
provider_type="remote::cerebras",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.cerebras",
|
|
config_class="llama_stack.providers.remote.inference.cerebras.CerebrasImplConfig",
|
|
description="Cerebras inference provider for running models on Cerebras Cloud platform.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="ollama",
|
|
provider_type="remote::ollama",
|
|
pip_packages=["ollama", "aiohttp", "h11>=0.16.0"],
|
|
config_class="llama_stack.providers.remote.inference.ollama.OllamaImplConfig",
|
|
module="llama_stack.providers.remote.inference.ollama",
|
|
description="Ollama inference provider for running local models through the Ollama runtime.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="vllm",
|
|
provider_type="remote::vllm",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.vllm",
|
|
config_class="llama_stack.providers.remote.inference.vllm.VLLMInferenceAdapterConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.vllm.VLLMProviderDataValidator",
|
|
description="Remote vLLM inference provider for connecting to vLLM servers.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="tgi",
|
|
provider_type="remote::tgi",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.TGIImplConfig",
|
|
description="Text Generation Inference (TGI) provider for HuggingFace model serving.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="hf::serverless",
|
|
provider_type="remote::hf::serverless",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceAPIImplConfig",
|
|
description="HuggingFace Inference API serverless provider for on-demand model inference.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="remote::hf::endpoint",
|
|
adapter_type="hf::endpoint",
|
|
pip_packages=["huggingface_hub", "aiohttp"],
|
|
module="llama_stack.providers.remote.inference.tgi",
|
|
config_class="llama_stack.providers.remote.inference.tgi.InferenceEndpointImplConfig",
|
|
description="HuggingFace Inference Endpoints provider for dedicated model serving.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="fireworks",
|
|
provider_type="remote::fireworks",
|
|
pip_packages=[
|
|
"fireworks-ai<=0.17.16",
|
|
],
|
|
module="llama_stack.providers.remote.inference.fireworks",
|
|
config_class="llama_stack.providers.remote.inference.fireworks.FireworksImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.fireworks.FireworksProviderDataValidator",
|
|
description="Fireworks AI inference provider for Llama models and other AI models on the Fireworks platform.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="together",
|
|
provider_type="remote::together",
|
|
pip_packages=[
|
|
"together",
|
|
],
|
|
module="llama_stack.providers.remote.inference.together",
|
|
config_class="llama_stack.providers.remote.inference.together.TogetherImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.together.TogetherProviderDataValidator",
|
|
description="Together AI inference provider for open-source models and collaborative AI development.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="bedrock",
|
|
provider_type="remote::bedrock",
|
|
pip_packages=["boto3"],
|
|
module="llama_stack.providers.remote.inference.bedrock",
|
|
config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig",
|
|
description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="databricks",
|
|
provider_type="remote::databricks",
|
|
pip_packages=["databricks-sdk"],
|
|
module="llama_stack.providers.remote.inference.databricks",
|
|
config_class="llama_stack.providers.remote.inference.databricks.DatabricksImplConfig",
|
|
description="Databricks inference provider for running models on Databricks' unified analytics platform.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="nvidia",
|
|
provider_type="remote::nvidia",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.nvidia",
|
|
config_class="llama_stack.providers.remote.inference.nvidia.NVIDIAConfig",
|
|
description="NVIDIA inference provider for accessing NVIDIA NIM models and AI services.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="runpod",
|
|
provider_type="remote::runpod",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.runpod",
|
|
config_class="llama_stack.providers.remote.inference.runpod.RunpodImplConfig",
|
|
description="RunPod inference provider for running models on RunPod's cloud GPU platform.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="openai",
|
|
provider_type="remote::openai",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.openai",
|
|
config_class="llama_stack.providers.remote.inference.openai.OpenAIConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.openai.config.OpenAIProviderDataValidator",
|
|
description="OpenAI inference provider for accessing GPT models and other OpenAI services.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="anthropic",
|
|
provider_type="remote::anthropic",
|
|
pip_packages=["anthropic"],
|
|
module="llama_stack.providers.remote.inference.anthropic",
|
|
config_class="llama_stack.providers.remote.inference.anthropic.AnthropicConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.anthropic.config.AnthropicProviderDataValidator",
|
|
description="Anthropic inference provider for accessing Claude models and Anthropic's AI services.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="gemini",
|
|
provider_type="remote::gemini",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.gemini",
|
|
config_class="llama_stack.providers.remote.inference.gemini.GeminiConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.gemini.config.GeminiProviderDataValidator",
|
|
description="Google Gemini inference provider for accessing Gemini models and Google's AI services.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="vertexai",
|
|
provider_type="remote::vertexai",
|
|
pip_packages=[
|
|
"google-cloud-aiplatform",
|
|
],
|
|
module="llama_stack.providers.remote.inference.vertexai",
|
|
config_class="llama_stack.providers.remote.inference.vertexai.VertexAIConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.vertexai.config.VertexAIProviderDataValidator",
|
|
description="""Google Vertex AI inference provider enables you to use Google's Gemini models through Google Cloud's Vertex AI platform, providing several advantages:
|
|
|
|
• Enterprise-grade security: Uses Google Cloud's security controls and IAM
|
|
• Better integration: Seamless integration with other Google Cloud services
|
|
• Advanced features: Access to additional Vertex AI features like model tuning and monitoring
|
|
• Authentication: Uses Google Cloud Application Default Credentials (ADC) instead of API keys
|
|
|
|
Configuration:
|
|
- Set VERTEX_AI_PROJECT environment variable (required)
|
|
- Set VERTEX_AI_LOCATION environment variable (optional, defaults to us-central1)
|
|
- Use Google Cloud Application Default Credentials or service account key
|
|
|
|
Authentication Setup:
|
|
Option 1 (Recommended): gcloud auth application-default login
|
|
Option 2: Set GOOGLE_APPLICATION_CREDENTIALS to service account key path
|
|
|
|
Available Models:
|
|
- vertex_ai/gemini-2.0-flash
|
|
- vertex_ai/gemini-2.5-flash
|
|
- vertex_ai/gemini-2.5-pro""",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="groq",
|
|
provider_type="remote::groq",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.groq",
|
|
config_class="llama_stack.providers.remote.inference.groq.GroqConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.groq.config.GroqProviderDataValidator",
|
|
description="Groq inference provider for ultra-fast inference using Groq's LPU technology.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="llama-openai-compat",
|
|
provider_type="remote::llama-openai-compat",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.llama_openai_compat",
|
|
config_class="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaCompatConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.llama_openai_compat.config.LlamaProviderDataValidator",
|
|
description="Llama OpenAI-compatible provider for using Llama models with OpenAI API format.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="sambanova",
|
|
provider_type="remote::sambanova",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.sambanova",
|
|
config_class="llama_stack.providers.remote.inference.sambanova.SambaNovaImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.sambanova.config.SambaNovaProviderDataValidator",
|
|
description="SambaNova inference provider for running models on SambaNova's dataflow architecture.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="passthrough",
|
|
provider_type="remote::passthrough",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.passthrough",
|
|
config_class="llama_stack.providers.remote.inference.passthrough.PassthroughImplConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.passthrough.PassthroughProviderDataValidator",
|
|
description="Passthrough inference provider for connecting to any external inference service not directly supported.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
adapter_type="watsonx",
|
|
provider_type="remote::watsonx",
|
|
pip_packages=["litellm"],
|
|
module="llama_stack.providers.remote.inference.watsonx",
|
|
config_class="llama_stack.providers.remote.inference.watsonx.WatsonXConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.watsonx.config.WatsonXProviderDataValidator",
|
|
description="IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform.",
|
|
),
|
|
RemoteProviderSpec(
|
|
api=Api.inference,
|
|
provider_type="remote::azure",
|
|
adapter_type="azure",
|
|
pip_packages=[],
|
|
module="llama_stack.providers.remote.inference.azure",
|
|
config_class="llama_stack.providers.remote.inference.azure.AzureConfig",
|
|
provider_data_validator="llama_stack.providers.remote.inference.azure.config.AzureProviderDataValidator",
|
|
description="""
|
|
Azure OpenAI inference provider for accessing GPT models and other Azure services.
|
|
Provider documentation
|
|
https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|
""",
|
|
),
|
|
]
|