From e894e36eea5c49db88a241ac95ef24f1aa7183fc Mon Sep 17 00:00:00 2001 From: Sumanth Kamenani Date: Thu, 6 Nov 2025 20:18:18 -0500 Subject: [PATCH 1/2] feat: add OpenAI-compatible Bedrock provider (#3748) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements AWS Bedrock inference provider using OpenAI-compatible endpoint for Llama models available through Bedrock. Closes: #3410 ## What does this PR do? Adds AWS Bedrock as an inference provider using the OpenAI-compatible endpoint. This lets us use Bedrock models (GPT-OSS, Llama) through the standard llama-stack inference API. The implementation uses LiteLLM's OpenAI client under the hood, so it gets all the OpenAI compatibility features. The provider handles per-request API key overrides via headers. ## Test Plan **Tested the following scenarios:** - Non-streaming completion - basic request/response flow - Streaming completion - SSE streaming with chunked responses - Multi-turn conversations - context retention across turns - Tool calling - function calling with proper tool_calls format # Bedrock OpenAI-Compatible Provider - Test Results **Model:** `bedrock-inference/openai.gpt-oss-20b-1:0` --- ## Test 1: Model Listing **Request:** ```http GET /v1/models HTTP/1.1 ``` **Response:** ```http HTTP/1.1 200 OK Content-Type: application/json { "data": [ {"identifier": "bedrock-inference/openai.gpt-oss-20b-1:0", ...}, {"identifier": "bedrock-inference/openai.gpt-oss-40b-1:0", ...} ] } ``` --- ## Test 2: Non-Streaming Completion **Request:** ```http POST /v1/chat/completions HTTP/1.1 Content-Type: application/json { "model": "bedrock-inference/openai.gpt-oss-20b-1:0", "messages": [{"role": "user", "content": "Say 'Hello from Bedrock' and nothing else"}], "stream": false } ``` **Response:** ```http HTTP/1.1 200 OK Content-Type: application/json { "choices": [{ "finish_reason": "stop", "message": {"content": "...Hello from Bedrock"} }], "usage": {"prompt_tokens": 79, "completion_tokens": 50, "total_tokens": 129} } ``` --- ## Test 3: Streaming Completion **Request:** ```http POST /v1/chat/completions HTTP/1.1 Content-Type: application/json { "model": "bedrock-inference/openai.gpt-oss-20b-1:0", "messages": [{"role": "user", "content": "Count from 1 to 5"}], "stream": true } ``` **Response:** ```http HTTP/1.1 200 OK Content-Type: text/event-stream [6 SSE chunks received] Final content: "1, 2, 3, 4, 5" ``` --- ## Test 4: Error Handling - Invalid Model **Request:** ```http POST /v1/chat/completions HTTP/1.1 Content-Type: application/json { "model": "invalid-model-id", "messages": [{"role": "user", "content": "Hello"}], "stream": false } ``` **Response:** ```http HTTP/1.1 404 Not Found Content-Type: application/json { "detail": "Model 'invalid-model-id' not found. Use 'client.models.list()' to list available Models." } ``` --- ## Test 5: Multi-Turn Conversation **Request 1:** ```http POST /v1/chat/completions HTTP/1.1 { "messages": [{"role": "user", "content": "My name is Alice"}] } ``` **Response 1:** ```http HTTP/1.1 200 OK { "choices": [{ "message": {"content": "...Nice to meet you, Alice! How can I help you today?"} }] } ``` **Request 2 (with history):** ```http POST /v1/chat/completions HTTP/1.1 { "messages": [ {"role": "user", "content": "My name is Alice"}, {"role": "assistant", "content": "...Nice to meet you, Alice!..."}, {"role": "user", "content": "What is my name?"} ] } ``` **Response 2:** ```http HTTP/1.1 200 OK { "choices": [{ "message": {"content": "...Your name is Alice."} }], "usage": {"prompt_tokens": 183, "completion_tokens": 42} } ``` **Context retained across turns** --- ## Test 6: System Messages **Request:** ```http POST /v1/chat/completions HTTP/1.1 { "messages": [ {"role": "system", "content": "You are Shakespeare. Respond only in Shakespearean English."}, {"role": "user", "content": "Tell me about the weather"} ] } ``` **Response:** ```http HTTP/1.1 200 OK { "choices": [{ "message": {"content": "Lo! I heed thy request..."} }], "usage": {"completion_tokens": 813} } ``` --- ## Test 7: Tool Calling **Request:** ```http POST /v1/chat/completions HTTP/1.1 { "messages": [{"role": "user", "content": "What's the weather in San Francisco?"}], "tools": [{ "type": "function", "function": { "name": "get_weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}} } }] } ``` **Response:** ```http HTTP/1.1 200 OK { "choices": [{ "finish_reason": "tool_calls", "message": { "tool_calls": [{ "function": {"name": "get_weather", "arguments": "{\"location\":\"San Francisco\"}"} }] } }] } ``` --- ## Test 8: Sampling Parameters **Request:** ```http POST /v1/chat/completions HTTP/1.1 { "messages": [{"role": "user", "content": "Say hello"}], "temperature": 0.7, "top_p": 0.9 } ``` **Response:** ```http HTTP/1.1 200 OK { "choices": [{ "message": {"content": "...Hello! 👋 How can I help you today?"} }] } ``` --- ## Test 9: Authentication Error Handling ### Subtest A: Invalid API Key **Request:** ```http POST /v1/chat/completions HTTP/1.1 x-llamastack-provider-data: {"aws_bedrock_api_key": "invalid-fake-key-12345"} {"model": "bedrock-inference/openai.gpt-oss-20b-1:0", ...} ``` **Response:** ```http HTTP/1.1 400 Bad Request { "detail": "Invalid value: Authentication failed: Error code: 401 - {'error': {'message': 'Invalid API Key format: Must start with pre-defined prefix', ...}}" } ``` --- ### Subtest B: Empty API Key (Fallback to Config) **Request:** ```http POST /v1/chat/completions HTTP/1.1 x-llamastack-provider-data: {"aws_bedrock_api_key": ""} {"model": "bedrock-inference/openai.gpt-oss-20b-1:0", ...} ``` **Response:** ```http HTTP/1.1 200 OK { "choices": [{ "message": {"content": "...Hello! How can I assist you today?"} }] } ``` **Fell back to config key** --- ### Subtest C: Malformed Token **Request:** ```http POST /v1/chat/completions HTTP/1.1 x-llamastack-provider-data: {"aws_bedrock_api_key": "not-a-valid-bedrock-token-format"} {"model": "bedrock-inference/openai.gpt-oss-20b-1:0", ...} ``` **Response:** ```http HTTP/1.1 400 Bad Request { "detail": "Invalid value: Authentication failed: Error code: 401 - {'error': {'message': 'Invalid API Key format: Must start with pre-defined prefix', ...}}" } ``` --- .../providers/inference/remote_bedrock.mdx | 19 +- src/llama_stack/core/routers/inference.py | 4 +- .../distributions/ci-tests/run.yaml | 3 + .../starter-gpu/run-with-postgres-store.yaml | 3 + .../distributions/starter-gpu/run.yaml | 3 + .../starter/run-with-postgres-store.yaml | 3 + .../distributions/starter/run.yaml | 3 + .../providers/registry/inference.py | 5 +- .../remote/inference/bedrock/__init__.py | 2 +- .../remote/inference/bedrock/bedrock.py | 191 ++++++++---------- .../remote/inference/bedrock/config.py | 27 ++- .../remote/inference/bedrock/models.py | 29 --- .../inference/test_bedrock_adapter.py | 78 +++++++ .../inference/test_bedrock_config.py | 39 ++++ tests/unit/providers/test_bedrock.py | 90 +++++---- 15 files changed, 309 insertions(+), 190 deletions(-) delete mode 100644 src/llama_stack/providers/remote/inference/bedrock/models.py create mode 100644 tests/unit/providers/inference/test_bedrock_adapter.py create mode 100644 tests/unit/providers/inference/test_bedrock_config.py diff --git a/docs/docs/providers/inference/remote_bedrock.mdx b/docs/docs/providers/inference/remote_bedrock.mdx index 683ec12f8..61931643e 100644 --- a/docs/docs/providers/inference/remote_bedrock.mdx +++ b/docs/docs/providers/inference/remote_bedrock.mdx @@ -1,5 +1,5 @@ --- -description: "AWS Bedrock inference provider for accessing various AI models through AWS's managed service." +description: "AWS Bedrock inference provider using OpenAI compatible endpoint." sidebar_label: Remote - Bedrock title: remote::bedrock --- @@ -8,7 +8,7 @@ title: remote::bedrock ## Description -AWS Bedrock inference provider for accessing various AI models through AWS's managed service. +AWS Bedrock inference provider using OpenAI compatible endpoint. ## Configuration @@ -16,19 +16,12 @@ AWS Bedrock inference provider for accessing various AI models through AWS's man |-------|------|----------|---------|-------------| | `allowed_models` | `list[str \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `refresh_models` | `` | No | False | Whether to refresh models periodically from the provider | -| `aws_access_key_id` | `str \| None` | No | | The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID | -| `aws_secret_access_key` | `str \| None` | No | | The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY | -| `aws_session_token` | `str \| None` | No | | The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN | -| `region_name` | `str \| None` | No | | The default AWS Region to use, for example, us-west-1 or us-west-2.Default use environment variable: AWS_DEFAULT_REGION | -| `profile_name` | `str \| None` | No | | The profile name that contains credentials to use.Default use environment variable: AWS_PROFILE | -| `total_max_attempts` | `int \| None` | No | | An integer representing the maximum number of attempts that will be made for a single request, including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS | -| `retry_mode` | `str \| None` | No | | A string representing the type of retries Boto3 will perform.Default use environment variable: AWS_RETRY_MODE | -| `connect_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to make a connection. The default is 60 seconds. | -| `read_timeout` | `float \| None` | No | 60.0 | The time in seconds till a timeout exception is thrown when attempting to read from a connection.The default is 60 seconds. | -| `session_ttl` | `int \| None` | No | 3600 | The time in seconds till a session expires. The default is 3600 seconds (1 hour). | +| `api_key` | `pydantic.types.SecretStr \| None` | No | | Authentication credential for the provider | +| `region_name` | `` | No | us-east-2 | AWS Region for the Bedrock Runtime endpoint | ## Sample Configuration ```yaml -{} +api_key: ${env.AWS_BEDROCK_API_KEY:=} +region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} ``` diff --git a/src/llama_stack/core/routers/inference.py b/src/llama_stack/core/routers/inference.py index a4f0f4411..d6270d428 100644 --- a/src/llama_stack/core/routers/inference.py +++ b/src/llama_stack/core/routers/inference.py @@ -190,7 +190,7 @@ class InferenceRouter(Inference): response = await provider.openai_completion(params) response.model = request_model_id - if self.telemetry_enabled: + if self.telemetry_enabled and response.usage is not None: metrics = self._construct_metrics( prompt_tokens=response.usage.prompt_tokens, completion_tokens=response.usage.completion_tokens, @@ -253,7 +253,7 @@ class InferenceRouter(Inference): if self.store: asyncio.create_task(self.store.store_chat_completion(response, params.messages)) - if self.telemetry_enabled: + if self.telemetry_enabled and response.usage is not None: metrics = self._construct_metrics( prompt_tokens=response.usage.prompt_tokens, completion_tokens=response.usage.completion_tokens, diff --git a/src/llama_stack/distributions/ci-tests/run.yaml b/src/llama_stack/distributions/ci-tests/run.yaml index 702acff8e..1118d2ad1 100644 --- a/src/llama_stack/distributions/ci-tests/run.yaml +++ b/src/llama_stack/distributions/ci-tests/run.yaml @@ -46,6 +46,9 @@ providers: api_key: ${env.TOGETHER_API_KEY:=} - provider_id: bedrock provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml index 6dbbc8716..1920ebd9d 100644 --- a/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter-gpu/run-with-postgres-store.yaml @@ -46,6 +46,9 @@ providers: api_key: ${env.TOGETHER_API_KEY:=} - provider_id: bedrock provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/distributions/starter-gpu/run.yaml b/src/llama_stack/distributions/starter-gpu/run.yaml index 807f0d678..7149b8659 100644 --- a/src/llama_stack/distributions/starter-gpu/run.yaml +++ b/src/llama_stack/distributions/starter-gpu/run.yaml @@ -46,6 +46,9 @@ providers: api_key: ${env.TOGETHER_API_KEY:=} - provider_id: bedrock provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml index 530084bd9..702f95381 100644 --- a/src/llama_stack/distributions/starter/run-with-postgres-store.yaml +++ b/src/llama_stack/distributions/starter/run-with-postgres-store.yaml @@ -46,6 +46,9 @@ providers: api_key: ${env.TOGETHER_API_KEY:=} - provider_id: bedrock provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/distributions/starter/run.yaml b/src/llama_stack/distributions/starter/run.yaml index eb4652af0..0ce392810 100644 --- a/src/llama_stack/distributions/starter/run.yaml +++ b/src/llama_stack/distributions/starter/run.yaml @@ -46,6 +46,9 @@ providers: api_key: ${env.TOGETHER_API_KEY:=} - provider_id: bedrock provider_type: remote::bedrock + config: + api_key: ${env.AWS_BEDROCK_API_KEY:=} + region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} - provider_id: ${env.NVIDIA_API_KEY:+nvidia} provider_type: remote::nvidia config: diff --git a/src/llama_stack/providers/registry/inference.py b/src/llama_stack/providers/registry/inference.py index 00967a8ec..1b70182fc 100644 --- a/src/llama_stack/providers/registry/inference.py +++ b/src/llama_stack/providers/registry/inference.py @@ -138,10 +138,11 @@ def available_providers() -> list[ProviderSpec]: api=Api.inference, adapter_type="bedrock", provider_type="remote::bedrock", - pip_packages=["boto3"], + pip_packages=[], module="llama_stack.providers.remote.inference.bedrock", config_class="llama_stack.providers.remote.inference.bedrock.BedrockConfig", - description="AWS Bedrock inference provider for accessing various AI models through AWS's managed service.", + provider_data_validator="llama_stack.providers.remote.inference.bedrock.config.BedrockProviderDataValidator", + description="AWS Bedrock inference provider using OpenAI compatible endpoint.", ), RemoteProviderSpec( api=Api.inference, diff --git a/src/llama_stack/providers/remote/inference/bedrock/__init__.py b/src/llama_stack/providers/remote/inference/bedrock/__init__.py index 4d98f4999..4b0686b18 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/__init__.py +++ b/src/llama_stack/providers/remote/inference/bedrock/__init__.py @@ -11,7 +11,7 @@ async def get_adapter_impl(config: BedrockConfig, _deps): assert isinstance(config, BedrockConfig), f"Unexpected config type: {type(config)}" - impl = BedrockInferenceAdapter(config) + impl = BedrockInferenceAdapter(config=config) await impl.initialize() diff --git a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py index d266f9e6f..1bf44b51a 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/bedrock.py +++ b/src/llama_stack/providers/remote/inference/bedrock/bedrock.py @@ -4,139 +4,124 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -import json -from collections.abc import AsyncIterator +from collections.abc import AsyncIterator, Iterable -from botocore.client import BaseClient +from openai import AuthenticationError from llama_stack.apis.inference import ( - ChatCompletionRequest, - Inference, + OpenAIChatCompletion, + OpenAIChatCompletionChunk, OpenAIChatCompletionRequestWithExtraBody, + OpenAICompletion, OpenAICompletionRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsResponse, ) -from llama_stack.apis.inference.inference import ( - OpenAIChatCompletion, - OpenAIChatCompletionChunk, - OpenAICompletion, -) -from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig -from llama_stack.providers.utils.bedrock.client import create_bedrock_client -from llama_stack.providers.utils.inference.model_registry import ( - ModelRegistryHelper, -) -from llama_stack.providers.utils.inference.openai_compat import ( - get_sampling_strategy_options, -) -from llama_stack.providers.utils.inference.prompt_adapter import ( - chat_completion_request_to_prompt, -) +from llama_stack.core.telemetry.tracing import get_current_span +from llama_stack.log import get_logger +from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin -from .models import MODEL_ENTRIES +from .config import BedrockConfig -REGION_PREFIX_MAP = { - "us": "us.", - "eu": "eu.", - "ap": "ap.", -} +logger = get_logger(name=__name__, category="inference::bedrock") -def _get_region_prefix(region: str | None) -> str: - # AWS requires region prefixes for inference profiles - if region is None: - return "us." # default to US when we don't know +class BedrockInferenceAdapter(OpenAIMixin): + """ + Adapter for AWS Bedrock's OpenAI-compatible API endpoints. - # Handle case insensitive region matching - region_lower = region.lower() - for prefix in REGION_PREFIX_MAP: - if region_lower.startswith(f"{prefix}-"): - return REGION_PREFIX_MAP[prefix] + Supports Llama models across regions and GPT-OSS models (us-west-2 only). - # Fallback to US for anything we don't recognize - return "us." + Note: Bedrock's OpenAI-compatible endpoint does not support /v1/models + for dynamic model discovery. Models must be pre-registered in the config. + """ + config: BedrockConfig + provider_data_api_key_field: str = "aws_bedrock_api_key" -def _to_inference_profile_id(model_id: str, region: str = None) -> str: - # Return ARNs unchanged - if model_id.startswith("arn:"): - return model_id + def get_base_url(self) -> str: + """Get base URL for OpenAI client.""" + return f"https://bedrock-runtime.{self.config.region_name}.amazonaws.com/openai/v1" - # Return inference profile IDs that already have regional prefixes - if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()): - return model_id + async def list_provider_model_ids(self) -> Iterable[str]: + """ + Bedrock's OpenAI-compatible endpoint does not support the /v1/models endpoint. + Returns empty list since models must be pre-registered in the config. + """ + return [] - # Default to US East when no region is provided - if region is None: - region = "us-east-1" - - return _get_region_prefix(region) + model_id - - -class BedrockInferenceAdapter( - ModelRegistryHelper, - Inference, -): - def __init__(self, config: BedrockConfig) -> None: - ModelRegistryHelper.__init__(self, model_entries=MODEL_ENTRIES) - self._config = config - self._client = None - - @property - def client(self) -> BaseClient: - if self._client is None: - self._client = create_bedrock_client(self._config) - return self._client - - async def initialize(self) -> None: - pass - - async def shutdown(self) -> None: - if self._client is not None: - self._client.close() - - async def _get_params_for_chat_completion(self, request: ChatCompletionRequest) -> dict: - bedrock_model = request.model - - sampling_params = request.sampling_params - options = get_sampling_strategy_options(sampling_params) - - if sampling_params.max_tokens: - options["max_gen_len"] = sampling_params.max_tokens - if sampling_params.repetition_penalty > 0: - options["repetition_penalty"] = sampling_params.repetition_penalty - - prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model)) - - # Convert foundation model ID to inference profile ID - region_name = self.client.meta.region_name - inference_profile_id = _to_inference_profile_id(bedrock_model, region_name) - - return { - "modelId": inference_profile_id, - "body": json.dumps( - { - "prompt": prompt, - **options, - } - ), - } + async def check_model_availability(self, model: str) -> bool: + """ + Bedrock doesn't support dynamic model listing via /v1/models. + Always return True to accept all models registered in the config. + """ + return True async def openai_embeddings( self, params: OpenAIEmbeddingsRequestWithExtraBody, ) -> OpenAIEmbeddingsResponse: - raise NotImplementedError() + """Bedrock's OpenAI-compatible API does not support the /v1/embeddings endpoint.""" + raise NotImplementedError( + "Bedrock's OpenAI-compatible API does not support /v1/embeddings endpoint. " + "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html" + ) async def openai_completion( self, params: OpenAICompletionRequestWithExtraBody, ) -> OpenAICompletion: - raise NotImplementedError("OpenAI completion not supported by the Bedrock provider") + """Bedrock's OpenAI-compatible API does not support the /v1/completions endpoint.""" + raise NotImplementedError( + "Bedrock's OpenAI-compatible API does not support /v1/completions endpoint. " + "Only /v1/chat/completions is supported. " + "See https://docs.aws.amazon.com/bedrock/latest/userguide/inference-chat-completions.html" + ) async def openai_chat_completion( self, params: OpenAIChatCompletionRequestWithExtraBody, ) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]: - raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider") + """Override to enable streaming usage metrics and handle authentication errors.""" + # Enable streaming usage metrics when telemetry is active + if params.stream and get_current_span() is not None: + if params.stream_options is None: + params.stream_options = {"include_usage": True} + elif "include_usage" not in params.stream_options: + params.stream_options = {**params.stream_options, "include_usage": True} + + try: + logger.debug(f"Calling Bedrock OpenAI API with model={params.model}, stream={params.stream}") + result = await super().openai_chat_completion(params=params) + logger.debug(f"Bedrock API returned: {type(result).__name__ if result is not None else 'None'}") + + if result is None: + logger.error(f"Bedrock OpenAI client returned None for model={params.model}, stream={params.stream}") + raise RuntimeError( + f"Bedrock API returned no response for model '{params.model}'. " + "This may indicate the model is not supported or a network/API issue occurred." + ) + + return result + except AuthenticationError as e: + error_msg = str(e) + + # Check if this is a token expiration error + if "expired" in error_msg.lower() or "Bearer Token has expired" in error_msg: + logger.error(f"AWS Bedrock authentication token expired: {error_msg}") + raise ValueError( + "AWS Bedrock authentication failed: Bearer token has expired. " + "The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. " + "Please refresh your token by generating a new pre-signed URL with AWS credentials. " + "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints." + ) from e + else: + logger.error(f"AWS Bedrock authentication failed: {error_msg}") + raise ValueError( + f"AWS Bedrock authentication failed: {error_msg}. " + "Please verify your API key is correct in the provider config or x-llamastack-provider-data header. " + "The API key should be a valid AWS pre-signed URL for Bedrock's OpenAI-compatible endpoint." + ) from e + except Exception as e: + logger.error(f"Unexpected error calling Bedrock API: {type(e).__name__}: {e}", exc_info=True) + raise diff --git a/src/llama_stack/providers/remote/inference/bedrock/config.py b/src/llama_stack/providers/remote/inference/bedrock/config.py index 5961a2f15..631a6e7ef 100644 --- a/src/llama_stack/providers/remote/inference/bedrock/config.py +++ b/src/llama_stack/providers/remote/inference/bedrock/config.py @@ -4,8 +4,29 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig +import os + +from pydantic import BaseModel, Field + +from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig -class BedrockConfig(BedrockBaseConfig): - pass +class BedrockProviderDataValidator(BaseModel): + aws_bedrock_api_key: str | None = Field( + default=None, + description="API key for Amazon Bedrock", + ) + + +class BedrockConfig(RemoteInferenceProviderConfig): + region_name: str = Field( + default_factory=lambda: os.getenv("AWS_DEFAULT_REGION", "us-east-2"), + description="AWS Region for the Bedrock Runtime endpoint", + ) + + @classmethod + def sample_run_config(cls, **kwargs): + return { + "api_key": "${env.AWS_BEDROCK_API_KEY:=}", + "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}", + } diff --git a/src/llama_stack/providers/remote/inference/bedrock/models.py b/src/llama_stack/providers/remote/inference/bedrock/models.py deleted file mode 100644 index 17273c122..000000000 --- a/src/llama_stack/providers/remote/inference/bedrock/models.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# All rights reserved. -# -# This source code is licensed under the terms described in the LICENSE file in -# the root directory of this source tree. - -from llama_stack.models.llama.sku_types import CoreModelId -from llama_stack.providers.utils.inference.model_registry import ( - build_hf_repo_model_entry, -) - -SAFETY_MODELS_ENTRIES = [] - - -# https://docs.aws.amazon.com/bedrock/latest/userguide/models-supported.html -MODEL_ENTRIES = [ - build_hf_repo_model_entry( - "meta.llama3-1-8b-instruct-v1:0", - CoreModelId.llama3_1_8b_instruct.value, - ), - build_hf_repo_model_entry( - "meta.llama3-1-70b-instruct-v1:0", - CoreModelId.llama3_1_70b_instruct.value, - ), - build_hf_repo_model_entry( - "meta.llama3-1-405b-instruct-v1:0", - CoreModelId.llama3_1_405b_instruct.value, - ), -] + SAFETY_MODELS_ENTRIES diff --git a/tests/unit/providers/inference/test_bedrock_adapter.py b/tests/unit/providers/inference/test_bedrock_adapter.py new file mode 100644 index 000000000..fdd07c032 --- /dev/null +++ b/tests/unit/providers/inference/test_bedrock_adapter.py @@ -0,0 +1,78 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock + +import pytest +from openai import AuthenticationError + +from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody +from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter +from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + +def test_adapter_initialization(): + config = BedrockConfig(api_key="test-key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + assert adapter.config.auth_credential.get_secret_value() == "test-key" + assert adapter.config.region_name == "us-east-1" + + +def test_client_url_construction(): + config = BedrockConfig(api_key="test-key", region_name="us-west-2") + adapter = BedrockInferenceAdapter(config=config) + + assert adapter.get_base_url() == "https://bedrock-runtime.us-west-2.amazonaws.com/openai/v1" + + +def test_api_key_from_config(): + config = BedrockConfig(api_key="config-key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + assert adapter.config.auth_credential.get_secret_value() == "config-key" + + +def test_api_key_from_header_overrides_config(): + """Test API key from request header overrides config via client property""" + config = BedrockConfig(api_key="config-key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + adapter.provider_data_api_key_field = "aws_bedrock_api_key" + adapter.get_request_provider_data = MagicMock(return_value=SimpleNamespace(aws_bedrock_api_key="header-key")) + + # The client property is where header override happens (in OpenAIMixin) + assert adapter.client.api_key == "header-key" + + +async def test_authentication_error_handling(): + """Test that AuthenticationError from OpenAI client is converted to ValueError with helpful message""" + config = BedrockConfig(api_key="invalid-key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + # Mock the parent class method to raise AuthenticationError + mock_response = MagicMock() + mock_response.message = "Invalid authentication credentials" + auth_error = AuthenticationError(message="Invalid authentication credentials", response=mock_response, body=None) + + # Create a mock that raises the error + mock_super = AsyncMock(side_effect=auth_error) + + # Patch the parent class method + original_method = BedrockInferenceAdapter.__bases__[0].openai_chat_completion + BedrockInferenceAdapter.__bases__[0].openai_chat_completion = mock_super + + try: + with pytest.raises(ValueError) as exc_info: + params = OpenAIChatCompletionRequestWithExtraBody( + model="test-model", messages=[{"role": "user", "content": "test"}] + ) + await adapter.openai_chat_completion(params=params) + + assert "AWS Bedrock authentication failed" in str(exc_info.value) + assert "Please verify your API key" in str(exc_info.value) + finally: + # Restore original method + BedrockInferenceAdapter.__bases__[0].openai_chat_completion = original_method diff --git a/tests/unit/providers/inference/test_bedrock_config.py b/tests/unit/providers/inference/test_bedrock_config.py new file mode 100644 index 000000000..4c1fd56a2 --- /dev/null +++ b/tests/unit/providers/inference/test_bedrock_config.py @@ -0,0 +1,39 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig + + +def test_bedrock_config_defaults_no_env(monkeypatch): + """Test BedrockConfig defaults when env vars are not set""" + monkeypatch.delenv("AWS_BEDROCK_API_KEY", raising=False) + monkeypatch.delenv("AWS_DEFAULT_REGION", raising=False) + config = BedrockConfig() + assert config.auth_credential is None + assert config.region_name == "us-east-2" + + +def test_bedrock_config_reads_from_env(monkeypatch): + """Test BedrockConfig field initialization reads from environment variables""" + monkeypatch.setenv("AWS_DEFAULT_REGION", "eu-west-1") + config = BedrockConfig() + assert config.region_name == "eu-west-1" + + +def test_bedrock_config_with_values(): + """Test BedrockConfig accepts explicit values via alias""" + config = BedrockConfig(api_key="test-key", region_name="us-west-2") + assert config.auth_credential.get_secret_value() == "test-key" + assert config.region_name == "us-west-2" + + +def test_bedrock_config_sample(): + """Test BedrockConfig sample_run_config returns correct format""" + sample = BedrockConfig.sample_run_config() + assert "api_key" in sample + assert "region_name" in sample + assert sample["api_key"] == "${env.AWS_BEDROCK_API_KEY:=}" + assert sample["region_name"] == "${env.AWS_DEFAULT_REGION:=us-east-2}" diff --git a/tests/unit/providers/test_bedrock.py b/tests/unit/providers/test_bedrock.py index 1ff07bbbe..684fcf262 100644 --- a/tests/unit/providers/test_bedrock.py +++ b/tests/unit/providers/test_bedrock.py @@ -4,50 +4,66 @@ # This source code is licensed under the terms described in the LICENSE file in # the root directory of this source tree. -from llama_stack.providers.remote.inference.bedrock.bedrock import ( - _get_region_prefix, - _to_inference_profile_id, -) +from types import SimpleNamespace +from unittest.mock import AsyncMock, PropertyMock, patch + +from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody +from llama_stack.providers.remote.inference.bedrock.bedrock import BedrockInferenceAdapter +from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig -def test_region_prefixes(): - assert _get_region_prefix("us-east-1") == "us." - assert _get_region_prefix("eu-west-1") == "eu." - assert _get_region_prefix("ap-south-1") == "ap." - assert _get_region_prefix("ca-central-1") == "us." +def test_can_create_adapter(): + config = BedrockConfig(api_key="test-key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) - # Test case insensitive - assert _get_region_prefix("US-EAST-1") == "us." - assert _get_region_prefix("EU-WEST-1") == "eu." - assert _get_region_prefix("Ap-South-1") == "ap." - - # Test None region - assert _get_region_prefix(None) == "us." + assert adapter is not None + assert adapter.config.region_name == "us-east-1" + assert adapter.get_api_key() == "test-key" -def test_model_id_conversion(): - # Basic conversion - assert ( - _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "us-east-1") == "us.meta.llama3-1-70b-instruct-v1:0" +def test_different_aws_regions(): + # just check a couple regions to verify URL construction works + config = BedrockConfig(api_key="key", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + assert adapter.get_base_url() == "https://bedrock-runtime.us-east-1.amazonaws.com/openai/v1" + + config = BedrockConfig(api_key="key", region_name="eu-west-1") + adapter = BedrockInferenceAdapter(config=config) + assert adapter.get_base_url() == "https://bedrock-runtime.eu-west-1.amazonaws.com/openai/v1" + + +async def test_basic_chat_completion(): + """Test basic chat completion works with OpenAIMixin""" + config = BedrockConfig(api_key="k", region_name="us-east-1") + adapter = BedrockInferenceAdapter(config=config) + + class FakeModelStore: + async def has_model(self, model_id): + return True + + async def get_model(self, model_id): + return SimpleNamespace(provider_resource_id="meta.llama3-1-8b-instruct-v1:0") + + adapter.model_store = FakeModelStore() + + fake_response = SimpleNamespace( + id="chatcmpl-123", + choices=[SimpleNamespace(message=SimpleNamespace(content="Hello!", role="assistant"), finish_reason="stop")], ) - # Already has prefix - assert ( - _to_inference_profile_id("us.meta.llama3-1-70b-instruct-v1:0", "us-east-1") - == "us.meta.llama3-1-70b-instruct-v1:0" - ) + mock_create = AsyncMock(return_value=fake_response) - # ARN should be returned unchanged - arn = "arn:aws:bedrock:us-east-1:123456789012:inference-profile/us.meta.llama3-1-70b-instruct-v1:0" - assert _to_inference_profile_id(arn, "us-east-1") == arn + class FakeClient: + def __init__(self): + self.chat = SimpleNamespace(completions=SimpleNamespace(create=mock_create)) - # ARN should be returned unchanged even without region - assert _to_inference_profile_id(arn) == arn + with patch.object(type(adapter), "client", new_callable=PropertyMock, return_value=FakeClient()): + params = OpenAIChatCompletionRequestWithExtraBody( + model="llama3-1-8b", + messages=[{"role": "user", "content": "hello"}], + stream=False, + ) + response = await adapter.openai_chat_completion(params=params) - # Optional region parameter defaults to us-east-1 - assert _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0") == "us.meta.llama3-1-70b-instruct-v1:0" - - # Different regions work with optional parameter - assert ( - _to_inference_profile_id("meta.llama3-1-70b-instruct-v1:0", "eu-west-1") == "eu.meta.llama3-1-70b-instruct-v1:0" - ) + assert response.id == "chatcmpl-123" + assert mock_create.await_count == 1 From 8e457f1cec704be5c37bd5e9856613dbf263a396 Mon Sep 17 00:00:00 2001 From: Eric Huang Date: Thu, 6 Nov 2025 22:37:27 -0800 Subject: [PATCH 2/2] chore(ui): add npm package and dockerfile # What does this PR do? ## Test Plan # What does this PR do? ## Test Plan --- docs/docs/distributions/index.mdx | 1 + docs/docs/distributions/llama_stack_ui.mdx | 109 +++++++++++++++++++++ docs/sidebars.ts | 1 + src/llama_stack_ui/.dockerignore | 20 ++++ src/llama_stack_ui/Dockerfile | 18 ++++ src/llama_stack_ui/bin/cli.js | 34 +++++++ src/llama_stack_ui/next.config.ts | 8 +- src/llama_stack_ui/package-lock.json | 16 +-- src/llama_stack_ui/package.json | 30 +++++- src/llama_stack_ui/scripts/postbuild.js | 40 ++++++++ 10 files changed, 264 insertions(+), 13 deletions(-) create mode 100644 docs/docs/distributions/llama_stack_ui.mdx create mode 100644 src/llama_stack_ui/.dockerignore create mode 100644 src/llama_stack_ui/Dockerfile create mode 100755 src/llama_stack_ui/bin/cli.js create mode 100644 src/llama_stack_ui/scripts/postbuild.js diff --git a/docs/docs/distributions/index.mdx b/docs/docs/distributions/index.mdx index 0149f143f..ebf4bd6ce 100644 --- a/docs/docs/distributions/index.mdx +++ b/docs/docs/distributions/index.mdx @@ -19,3 +19,4 @@ This section provides an overview of the distributions available in Llama Stack. - **[Starting Llama Stack Server](./starting_llama_stack_server.mdx)** - How to run distributions - **[Importing as Library](./importing_as_library.mdx)** - Use distributions in your code - **[Configuration Reference](./configuration.mdx)** - Configuration file format details +- **[Llama Stack UI](./llama_stack_ui.mdx)** - Web-based user interface for interacting with Llama Stack servers diff --git a/docs/docs/distributions/llama_stack_ui.mdx b/docs/docs/distributions/llama_stack_ui.mdx new file mode 100644 index 000000000..7ba47ea4d --- /dev/null +++ b/docs/docs/distributions/llama_stack_ui.mdx @@ -0,0 +1,109 @@ +--- +title: Llama Stack UI +description: Web-based user interface for interacting with Llama Stack servers +sidebar_label: Llama Stack UI +sidebar_position: 8 +--- + +# Llama Stack UI + +The Llama Stack UI is a web-based interface for interacting with Llama Stack servers. Built with Next.js and React, it provides a visual way to work with agents, manage resources, and view logs. + +## Features + +- **Logs & Monitoring**: View chat completions, agent responses, and vector store activity +- **Vector Stores**: Create and manage vector databases for RAG (Retrieval-Augmented Generation) workflows +- **Prompt Management**: Create and manage reusable prompts + +## Prerequisites + +You need a running Llama Stack server. The UI is a client that connects to the Llama Stack backend. + +If you don't have a Llama Stack server running yet, see the [Starting Llama Stack Server](../getting_started/starting_llama_stack_server.mdx) guide. + +## Running the UI + +### Option 1: Using npx (Recommended for Quick Start) + +The fastest way to get started is using `npx`: + +```bash +npx llama-stack-ui +``` + +This will start the UI server on `http://localhost:8322` (default port). + +### Option 2: Using Docker + +Run the UI in a container: + +```bash +docker run -p 8322:8322 llamastack/ui +``` + +Access the UI at `http://localhost:8322`. + +## Environment Variables + +The UI can be configured using the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_BACKEND_URL` | URL of your Llama Stack server | `http://localhost:8321` | +| `LLAMA_STACK_UI_PORT` | Port for the UI server | `8322` | + +If the Llama Stack server is running with authentication enabled, you can configure the UI to use it by setting the following environment variables: + +| Variable | Description | Default | +|----------|-------------|---------| +| `NEXTAUTH_URL` | NextAuth URL for authentication | `http://localhost:8322` | +| `GITHUB_CLIENT_ID` | GitHub OAuth client ID (optional, for authentication) | - | +| `GITHUB_CLIENT_SECRET` | GitHub OAuth client secret (optional, for authentication) | - | + +### Setting Environment Variables + +#### For npx: + +```bash +LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ +LLAMA_STACK_UI_PORT=8080 \ +npx llama-stack-ui +``` + +#### For Docker: + +```bash +docker run -p 8080:8080 \ + -e LLAMA_STACK_BACKEND_URL=http://localhost:8321 \ + -e LLAMA_STACK_UI_PORT=8080 \ + llamastack/ui +``` + +## Using the UI + +### Managing Resources + +- **Vector Stores**: Create vector databases for RAG workflows, view stored documents and embeddings +- **Prompts**: Create and manage reusable prompt templates +- **Chat Completions**: View history of chat interactions +- **Responses**: Browse detailed agent responses and tool calls + +## Development + +If you want to run the UI from source for development: + +```bash +# From the project root +cd src/llama_stack_ui + +# Install dependencies +npm install + +# Set environment variables +export LLAMA_STACK_BACKEND_URL=http://localhost:8321 + +# Start the development server +npm run dev +``` + +The development server will start on `http://localhost:8322` with hot reloading enabled. diff --git a/docs/sidebars.ts b/docs/sidebars.ts index 641c2eed3..7b4ac5ac8 100644 --- a/docs/sidebars.ts +++ b/docs/sidebars.ts @@ -57,6 +57,7 @@ const sidebars: SidebarsConfig = { 'distributions/importing_as_library', 'distributions/configuration', 'distributions/starting_llama_stack_server', + 'distributions/llama_stack_ui', { type: 'category', label: 'Self-Hosted Distributions', diff --git a/src/llama_stack_ui/.dockerignore b/src/llama_stack_ui/.dockerignore new file mode 100644 index 000000000..e3d1daae6 --- /dev/null +++ b/src/llama_stack_ui/.dockerignore @@ -0,0 +1,20 @@ +.git +.gitignore +.env.local +.env.*.local +.next +node_modules +npm-debug.log +*.md +.DS_Store +.vscode +.idea +playwright-report +e2e +jest.config.ts +jest.setup.ts +eslint.config.mjs +.prettierrc +.prettierignore +.nvmrc +playwright.config.ts diff --git a/src/llama_stack_ui/Dockerfile b/src/llama_stack_ui/Dockerfile new file mode 100644 index 000000000..6aea3dbfd --- /dev/null +++ b/src/llama_stack_ui/Dockerfile @@ -0,0 +1,18 @@ +FROM node:22.5.1-alpine + +ENV NODE_ENV=production + +# Install dumb-init for proper signal handling +RUN apk add --no-cache dumb-init + +# Create non-root user for security +RUN addgroup --system --gid 1001 nodejs +RUN adduser --system --uid 1001 nextjs + +# Install llama-stack-ui from npm +RUN npm install -g llama-stack-ui + +USER nextjs + +ENTRYPOINT ["dumb-init", "--"] +CMD ["llama-stack-ui"] diff --git a/src/llama_stack_ui/bin/cli.js b/src/llama_stack_ui/bin/cli.js new file mode 100755 index 000000000..6069d2f22 --- /dev/null +++ b/src/llama_stack_ui/bin/cli.js @@ -0,0 +1,34 @@ +#!/usr/bin/env node + +const { spawn } = require('child_process'); +const path = require('path'); + +const port = process.env.LLAMA_STACK_UI_PORT || 8322; +const uiDir = path.resolve(__dirname, '..'); +const serverPath = path.join(uiDir, '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'server.js'); +const serverDir = path.dirname(serverPath); + +console.log(`Starting Llama Stack UI on http://localhost:${port}`); + +const child = spawn(process.execPath, [serverPath], { + cwd: serverDir, + stdio: 'inherit', + env: { + ...process.env, + PORT: port, + }, +}); + +process.on('SIGINT', () => { + child.kill('SIGINT'); + process.exit(0); +}); + +process.on('SIGTERM', () => { + child.kill('SIGTERM'); + process.exit(0); +}); + +child.on('exit', (code) => { + process.exit(code); +}); diff --git a/src/llama_stack_ui/next.config.ts b/src/llama_stack_ui/next.config.ts index e9ffa3083..9f4a74eca 100644 --- a/src/llama_stack_ui/next.config.ts +++ b/src/llama_stack_ui/next.config.ts @@ -1,7 +1,13 @@ import type { NextConfig } from "next"; const nextConfig: NextConfig = { - /* config options here */ + typescript: { + ignoreBuildErrors: true, + }, + output: "standalone", + images: { + unoptimized: true, + }, }; export default nextConfig; diff --git a/src/llama_stack_ui/package-lock.json b/src/llama_stack_ui/package-lock.json index 14e34b720..aa8b2ac26 100644 --- a/src/llama_stack_ui/package-lock.json +++ b/src/llama_stack_ui/package-lock.json @@ -1,12 +1,13 @@ { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "ui", - "version": "0.1.0", + "name": "llama-stack-ui", + "version": "0.4.0-alpha.1", + "license": "MIT", "dependencies": { "@radix-ui/react-collapsible": "^1.1.12", "@radix-ui/react-dialog": "^1.1.15", @@ -20,7 +21,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", @@ -9684,8 +9685,9 @@ "license": "MIT" }, "node_modules/llama-stack-client": { - "version": "0.4.0-alpha.1", - "resolved": "git+ssh://git@github.com/llamastack/llama-stack-client-typescript.git#78de4862c4b7d77939ac210fa9f9bde77a2c5c5f", + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/llama-stack-client/-/llama-stack-client-0.3.1.tgz", + "integrity": "sha512-4aYoF2aAQiBSfxyZEtczeQmJn8q9T22ePDqGhR+ej5RG6a8wvl5B3v7ZoKuFkft+vcP/kbJ58GQZEPLekxekZA==", "license": "MIT", "dependencies": { "@types/node": "^18.11.18", diff --git a/src/llama_stack_ui/package.json b/src/llama_stack_ui/package.json index fb7dbee75..41afc9a11 100644 --- a/src/llama_stack_ui/package.json +++ b/src/llama_stack_ui/package.json @@ -1,11 +1,31 @@ { - "name": "ui", - "version": "0.1.0", - "private": true, + "name": "llama-stack-ui", + "version": "0.4.0-alpha.4", + "description": "Web UI for Llama Stack", + "license": "MIT", + "author": "Llama Stack ", + "repository": { + "type": "git", + "url": "https://github.com/llamastack/llama-stack.git", + "directory": "llama_stack_ui" + }, + "bin": { + "llama-stack-ui": "bin/cli.js" + }, + "files": [ + "bin", + ".next", + "public", + "next.config.ts", + "instrumentation.ts", + "tsconfig.json", + "package.json" + ], "scripts": { "dev": "next dev --turbopack --port ${LLAMA_STACK_UI_PORT:-8322}", - "build": "next build", + "build": "next build && node scripts/postbuild.js", "start": "next start", + "prepublishOnly": "npm run build", "lint": "next lint", "format": "prettier --write \"./**/*.{ts,tsx}\"", "format:check": "prettier --check \"./**/*.{ts,tsx}\"", @@ -25,7 +45,7 @@ "class-variance-authority": "^0.7.1", "clsx": "^2.1.1", "framer-motion": "^12.23.24", - "llama-stack-client": "github:llamastack/llama-stack-client-typescript", + "llama-stack-client": "^0.3.1", "lucide-react": "^0.545.0", "next": "15.5.4", "next-auth": "^4.24.11", diff --git a/src/llama_stack_ui/scripts/postbuild.js b/src/llama_stack_ui/scripts/postbuild.js new file mode 100644 index 000000000..4b4dbdf5d --- /dev/null +++ b/src/llama_stack_ui/scripts/postbuild.js @@ -0,0 +1,40 @@ +const fs = require('fs'); +const path = require('path'); + +// Copy public directory to standalone +const publicSrc = path.join(__dirname, '..', 'public'); +const publicDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', 'public'); + +if (fs.existsSync(publicSrc) && !fs.existsSync(publicDest)) { + console.log('Copying public directory to standalone...'); + copyDir(publicSrc, publicDest); +} + +// Copy .next/static to standalone +const staticSrc = path.join(__dirname, '..', '.next', 'static'); +const staticDest = path.join(__dirname, '..', '.next', 'standalone', 'ui', 'src', 'llama_stack_ui', '.next', 'static'); + +if (fs.existsSync(staticSrc) && !fs.existsSync(staticDest)) { + console.log('Copying .next/static to standalone...'); + copyDir(staticSrc, staticDest); +} + +function copyDir(src, dest) { + if (!fs.existsSync(dest)) { + fs.mkdirSync(dest, { recursive: true }); + } + + const files = fs.readdirSync(src); + files.forEach((file) => { + const srcFile = path.join(src, file); + const destFile = path.join(dest, file); + + if (fs.statSync(srcFile).isDirectory()) { + copyDir(srcFile, destFile); + } else { + fs.copyFileSync(srcFile, destFile); + } + }); +} + +console.log('Postbuild complete!');