fix: AWS Bedrock inference profile ID conversion for region-specific endpoints (#3386)

Fixes #3370 AWS switched to requiring region-prefixed inference profile IDs instead of foundation model IDs for on-demand throughput. This was causing ValidationException errors. Added auto-detection based on boto3 client region to convert model IDs like meta.llama3-1-70b-instruct-v1:0 to us.meta.llama3-1-70b-instruct-v1:0 depending on the detected region. Also handles edge cases like ARNs, case insensitive regions, and None regions. Tested with this request. ```json { "model_id": "meta.llama3-1-8b-instruct-v1:0", "messages": [ { "role": "system", "content": "You are a helpful assistant." }, { "role": "user", "content": "tell me a riddle" } ], "sampling_params": { "strategy": { "type": "top_p", "temperature": 0.7, "top_p": 0.9 }, "max_tokens": 512 } } ``` <img width="1488" height="878" alt="image" src="https://github.com/user-attachments/assets/0d61beec-3869-4a31-8f37-9f554c280b88" />
2025-12-03 09:53:45 +00:00 · 2025-09-11 05:41:53 -04:00 · 2025-09-11 05:41:53 -04:00 · 2838d5a20f
commit 2838d5a20f
parent 8e05c68d15
2 changed files with 102 additions and 2 deletions
--- a/llama_stack/providers/remote/inference/bedrock/bedrock.py
+++ b/llama_stack/providers/remote/inference/bedrock/bedrock.py
@ -53,6 +53,43 @@ from llama_stack.providers.utils.inference.prompt_adapter import (

 from .models import MODEL_ENTRIES

+REGION_PREFIX_MAP = {
+    "us": "us.",
+    "eu": "eu.",
+    "ap": "ap.",
+}
+
+
+def _get_region_prefix(region: str | None) -> str:
+    # AWS requires region prefixes for inference profiles
+    if region is None:
+        return "us."  # default to US when we don't know
+
+    # Handle case insensitive region matching
+    region_lower = region.lower()
+    for prefix in REGION_PREFIX_MAP:
+        if region_lower.startswith(f"{prefix}-"):
+            return REGION_PREFIX_MAP[prefix]
+
+    # Fallback to US for anything we don't recognize
+    return "us."
+
+
+def _to_inference_profile_id(model_id: str, region: str = None) -> str:
+    # Return ARNs unchanged
+    if model_id.startswith("arn:"):
+        return model_id
+
+    # Return inference profile IDs that already have regional prefixes
+    if any(model_id.startswith(p) for p in REGION_PREFIX_MAP.values()):
+        return model_id
+
+    # Default to US East when no region is provided
+    if region is None:
+        region = "us-east-1"
+
+    return _get_region_prefix(region) + model_id
+

 class BedrockInferenceAdapter(
    ModelRegistryHelper,
@ -166,8 +203,13 @@ class BedrockInferenceAdapter(
            options["repetition_penalty"] = sampling_params.repetition_penalty

        prompt = await chat_completion_request_to_prompt(request, self.get_llama_model(request.model))
+
+        # Convert foundation model ID to inference profile ID
+        region_name = self.client.meta.region_name
+        inference_profile_id = _to_inference_profile_id(bedrock_model, region_name)
+
        return {
-            "modelId": bedrock_model,
+            "modelId": inference_profile_id,
            "body": json.dumps(
                {
                    "prompt": prompt,
@ -185,6 +227,11 @@ class BedrockInferenceAdapter(
        task_type: EmbeddingTaskType | None = None,
    ) -> EmbeddingsResponse:
        model = await self.model_store.get_model(model_id)
+
+        # Convert foundation model ID to inference profile ID
+        region_name = self.client.meta.region_name
+        inference_profile_id = _to_inference_profile_id(model.provider_resource_id, region_name)
+
        embeddings = []
        for content in contents:
            assert not content_has_media(content), "Bedrock does not support media for embeddings"
@ -193,7 +240,7 @@ class BedrockInferenceAdapter(
            body = json.dumps(input_body)
            response = self.client.invoke_model(
                body=body,
-                modelId=model.provider_resource_id,
+                modelId=inference_profile_id,
                accept="application/json",
                contentType="application/json",
            )