Merge branch 'main' into santiagxf/azure-ai-inference

2025-10-16 23:03:49 +00:00 · 2024-11-07 12:43:55 -05:00 · 2024-11-07 12:43:55 -05:00 · 5c429b0b67
commit 5c429b0b67
parent e247849d1b cfcc0a871c
273 changed files with 5491 additions and 5418 deletions
--- a/llama_stack/providers/utils/bedrock/client.py
+++ b/llama_stack/providers/utils/bedrock/client.py
@ -0,0 +1,76 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+
+import boto3
+from botocore.client import BaseClient
+from botocore.config import Config
+
+from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
+from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
+    RefreshableBotoSession,
+)
+
+
+def create_bedrock_client(
+    config: BedrockBaseConfig, service_name: str = "bedrock-runtime"
+) -> BaseClient:
+    """Creates a boto3 client for Bedrock services with the given configuration.
+
+    Args:
+        config: The Bedrock configuration containing AWS credentials and settings
+        service_name: The AWS service name to create client for (default: "bedrock-runtime")
+
+    Returns:
+        A configured boto3 client
+    """
+    if config.aws_access_key_id and config.aws_secret_access_key:
+        retries_config = {
+            k: v
+            for k, v in dict(
+                total_max_attempts=config.total_max_attempts,
+                mode=config.retry_mode,
+            ).items()
+            if v is not None
+        }
+
+        config_args = {
+            k: v
+            for k, v in dict(
+                region_name=config.region_name,
+                retries=retries_config if retries_config else None,
+                connect_timeout=config.connect_timeout,
+                read_timeout=config.read_timeout,
+            ).items()
+            if v is not None
+        }
+
+        boto3_config = Config(**config_args)
+
+        session_args = {
+            "aws_access_key_id": config.aws_access_key_id,
+            "aws_secret_access_key": config.aws_secret_access_key,
+            "aws_session_token": config.aws_session_token,
+            "region_name": config.region_name,
+            "profile_name": config.profile_name,
+            "session_ttl": config.session_ttl,
+        }
+
+        # Remove None values
+        session_args = {k: v for k, v in session_args.items() if v is not None}
+
+        boto3_session = boto3.session.Session(**session_args)
+        return boto3_session.client(service_name, config=boto3_config)
+    else:
+        return (
+            RefreshableBotoSession(
+                region_name=config.region_name,
+                profile_name=config.profile_name,
+                session_ttl=config.session_ttl,
+            )
+            .refreshable_session()
+            .client(service_name)
+        )
--- a/llama_stack/providers/utils/bedrock/config.py
+++ b/llama_stack/providers/utils/bedrock/config.py
@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+from typing import Optional
+
+from llama_models.schema_utils import json_schema_type
+from pydantic import BaseModel, Field
+
+
+@json_schema_type
+class BedrockBaseConfig(BaseModel):
+    aws_access_key_id: Optional[str] = Field(
+        default=None,
+        description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
+    )
+    aws_secret_access_key: Optional[str] = Field(
+        default=None,
+        description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
+    )
+    aws_session_token: Optional[str] = Field(
+        default=None,
+        description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
+    )
+    region_name: Optional[str] = Field(
+        default=None,
+        description="The default AWS Region to use, for example, us-west-1 or us-west-2."
+        "Default use environment variable: AWS_DEFAULT_REGION",
+    )
+    profile_name: Optional[str] = Field(
+        default=None,
+        description="The profile name that contains credentials to use."
+        "Default use environment variable: AWS_PROFILE",
+    )
+    total_max_attempts: Optional[int] = Field(
+        default=None,
+        description="An integer representing the maximum number of attempts that will be made for a single request, "
+        "including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
+    )
+    retry_mode: Optional[str] = Field(
+        default=None,
+        description="A string representing the type of retries Boto3 will perform."
+        "Default use environment variable: AWS_RETRY_MODE",
+    )
+    connect_timeout: Optional[float] = Field(
+        default=60,
+        description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
+        "The default is 60 seconds.",
+    )
+    read_timeout: Optional[float] = Field(
+        default=60,
+        description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
+        "The default is 60 seconds.",
+    )
+    session_ttl: Optional[int] = Field(
+        default=3600,
+        description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
+    )
--- a/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
+++ b/llama_stack/providers/utils/bedrock/refreshable_boto_session.py
@ -0,0 +1,116 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+import datetime
+from time import time
+from uuid import uuid4
+
+from boto3 import Session
+from botocore.credentials import RefreshableCredentials
+from botocore.session import get_session
+
+
+class RefreshableBotoSession:
+    """
+    Boto Helper class which lets us create a refreshable session so that we can cache the client or resource.
+
+    Usage
+    -----
+    session = RefreshableBotoSession().refreshable_session()
+
+    client = session.client("s3") # we now can cache this client object without worrying about expiring credentials
+    """
+
+    def __init__(
+        self,
+        region_name: str = None,
+        profile_name: str = None,
+        sts_arn: str = None,
+        session_name: str = None,
+        session_ttl: int = 30000,
+    ):
+        """
+        Initialize `RefreshableBotoSession`
+
+        Parameters
+        ----------
+        region_name : str (optional)
+            Default region when creating a new connection.
+
+        profile_name : str (optional)
+            The name of a profile to use.
+
+        sts_arn : str (optional)
+            The role arn to sts before creating a session.
+
+        session_name : str (optional)
+            An identifier for the assumed role session. (required when `sts_arn` is given)
+
+        session_ttl : int (optional)
+            An integer number to set the TTL for each session. Beyond this session, it will renew the token.
+            50 minutes by default which is before the default role expiration of 1 hour
+        """
+
+        self.region_name = region_name
+        self.profile_name = profile_name
+        self.sts_arn = sts_arn
+        self.session_name = session_name or uuid4().hex
+        self.session_ttl = session_ttl
+
+    def __get_session_credentials(self):
+        """
+        Get session credentials
+        """
+        session = Session(region_name=self.region_name, profile_name=self.profile_name)
+
+        # if sts_arn is given, get credential by assuming the given role
+        if self.sts_arn:
+            sts_client = session.client(
+                service_name="sts", region_name=self.region_name
+            )
+            response = sts_client.assume_role(
+                RoleArn=self.sts_arn,
+                RoleSessionName=self.session_name,
+                DurationSeconds=self.session_ttl,
+            ).get("Credentials")
+
+            credentials = {
+                "access_key": response.get("AccessKeyId"),
+                "secret_key": response.get("SecretAccessKey"),
+                "token": response.get("SessionToken"),
+                "expiry_time": response.get("Expiration").isoformat(),
+            }
+        else:
+            session_credentials = session.get_credentials().get_frozen_credentials()
+            credentials = {
+                "access_key": session_credentials.access_key,
+                "secret_key": session_credentials.secret_key,
+                "token": session_credentials.token,
+                "expiry_time": datetime.datetime.fromtimestamp(
+                    time() + self.session_ttl, datetime.timezone.utc
+                ).isoformat(),
+            }
+
+        return credentials
+
+    def refreshable_session(self) -> Session:
+        """
+        Get refreshable boto3 session.
+        """
+        # Get refreshable credentials
+        refreshable_credentials = RefreshableCredentials.create_from_metadata(
+            metadata=self.__get_session_credentials(),
+            refresh_using=self.__get_session_credentials,
+            method="sts-assume-role",
+        )
+
+        # attach refreshable credentials current session
+        session = get_session()
+        session._credentials = refreshable_credentials
+        session.set_config_variable("region", self.region_name)
+        autorefresh_session = Session(botocore_session=session)
+
+        return autorefresh_session
--- a/llama_stack/providers/utils/inference/openai_compat.py
+++ b/llama_stack/providers/utils/inference/openai_compat.py
@ -49,6 +49,9 @@ def text_from_choice(choice) -> str:
    if hasattr(choice, "message"):
        return choice.message.content

+    if hasattr(choice, "message"):
+        return choice.message.content
+
    return choice.text


@ -102,7 +105,6 @@ def process_chat_completion_response(
 async def process_completion_stream_response(
    stream: AsyncGenerator[OpenAICompatCompletionResponse, None], formatter: ChatFormat
 ) -> AsyncGenerator:
-
    stop_reason = None

    async for chunk in stream:
@ -162,6 +164,7 @@ async def process_chat_completion_stream_response(

        text = text_from_choice(choice)
        if not text:
+            # Sometimes you get empty chunks from providers
            continue

        # check if its a tool call ( aka starts with <|python_tag|> )
--- a/llama_stack/providers/utils/inference/prompt_adapter.py
+++ b/llama_stack/providers/utils/inference/prompt_adapter.py
@ -3,10 +3,16 @@
 #
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.
+
+import base64
+import io
 import json
 from typing import Tuple

+import httpx
+
 from llama_models.llama3.api.chat_format import ChatFormat
+from PIL import Image as PIL_Image
 from termcolor import cprint

 from llama_models.llama3.api.datatypes import *  # noqa: F403
@ -24,6 +30,92 @@ from llama_models.sku_list import resolve_model
 from llama_stack.providers.utils.inference import supported_inference_models


+def content_has_media(content: InterleavedTextMedia):
+    def _has_media_content(c):
+        return isinstance(c, ImageMedia)
+
+    if isinstance(content, list):
+        return any(_has_media_content(c) for c in content)
+    else:
+        return _has_media_content(content)
+
+
+def messages_have_media(messages: List[Message]):
+    return any(content_has_media(m.content) for m in messages)
+
+
+def request_has_media(request: Union[ChatCompletionRequest, CompletionRequest]):
+    if isinstance(request, ChatCompletionRequest):
+        return messages_have_media(request.messages)
+    else:
+        return content_has_media(request.content)
+
+
+async def convert_image_media_to_url(
+    media: ImageMedia, download: bool = False, include_format: bool = True
+) -> str:
+    if isinstance(media.image, PIL_Image.Image):
+        if media.image.format == "PNG":
+            format = "png"
+        elif media.image.format == "GIF":
+            format = "gif"
+        elif media.image.format == "JPEG":
+            format = "jpeg"
+        else:
+            raise ValueError(f"Unsupported image format {media.image.format}")
+
+        bytestream = io.BytesIO()
+        media.image.save(bytestream, format=media.image.format)
+        bytestream.seek(0)
+        content = bytestream.getvalue()
+    else:
+        if not download:
+            return media.image.uri
+        else:
+            assert isinstance(media.image, URL)
+            async with httpx.AsyncClient() as client:
+                r = await client.get(media.image.uri)
+                content = r.content
+                content_type = r.headers.get("content-type")
+                if content_type:
+                    format = content_type.split("/")[-1]
+                else:
+                    format = "png"
+
+    if include_format:
+        return f"data:image/{format};base64," + base64.b64encode(content).decode(
+            "utf-8"
+        )
+    else:
+        return base64.b64encode(content).decode("utf-8")
+
+
+# TODO: name this function better! this is about OpenAI compatibile image
+# media conversion of the message. this should probably go in openai_compat.py
+async def convert_message_to_dict(message: Message, download: bool = False) -> dict:
+    async def _convert_content(content) -> dict:
+        if isinstance(content, ImageMedia):
+            return {
+                "type": "image_url",
+                "image_url": {
+                    "url": await convert_image_media_to_url(content, download=download),
+                },
+            }
+        else:
+            assert isinstance(content, str)
+            return {"type": "text", "text": content}
+
+    if isinstance(message.content, list):
+        content = [await _convert_content(c) for c in message.content]
+    else:
+        content = [await _convert_content(message.content)]
+
+    return {
+        "role": message.role,
+        "content": content,
+    }
+
+
 def completion_request_to_prompt(
    request: CompletionRequest, formatter: ChatFormat
 ) -> str:
--- a/llama_stack/providers/utils/kvstore/config.py
+++ b/llama_stack/providers/utils/kvstore/config.py
@ -4,10 +4,11 @@
 # This source code is licensed under the terms described in the LICENSE file in
 # the root directory of this source tree.

+import re
 from enum import Enum
 from typing import Literal, Optional, Union

-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 from typing_extensions import Annotated

 from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
@ -51,6 +52,23 @@ class PostgresKVStoreConfig(CommonConfig):
    db: str = "llamastack"
    user: str
    password: Optional[str] = None
+    table_name: str = "llamastack_kvstore"
+
+    @classmethod
+    @field_validator("table_name")
+    def validate_table_name(cls, v: str) -> str:
+        # PostgreSQL identifiers rules:
+        # - Must start with a letter or underscore
+        # - Can contain letters, numbers, and underscores
+        # - Maximum length is 63 bytes
+        pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
+        if not re.match(pattern, v):
+            raise ValueError(
+                "Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
+            )
+        if len(v) > 63:
+            raise ValueError("Table name must be less than 63 characters")
+        return v


 KVStoreConfig = Annotated[
--- a/llama_stack/providers/utils/kvstore/kvstore.py
+++ b/llama_stack/providers/utils/kvstore/kvstore.py
@ -43,7 +43,9 @@ async def kvstore_impl(config: KVStoreConfig) -> KVStore:

        impl = SqliteKVStoreImpl(config)
    elif config.type == KVStoreType.postgres.value:
-        raise NotImplementedError()
+        from .postgres import PostgresKVStoreImpl
+
+        impl = PostgresKVStoreImpl(config)
    else:
        raise ValueError(f"Unknown kvstore type {config.type}")

--- a/llama_stack/providers/utils/kvstore/postgres/init.py
+++ b/llama_stack/providers/utils/kvstore/postgres/init.py
@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from .postgres import PostgresKVStoreImpl  # noqa: F401 F403
--- a/llama_stack/providers/utils/kvstore/postgres/postgres.py
+++ b/llama_stack/providers/utils/kvstore/postgres/postgres.py
@ -0,0 +1,103 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+from datetime import datetime
+from typing import List, Optional
+
+import psycopg2
+from psycopg2.extras import DictCursor
+
+from ..api import KVStore
+from ..config import PostgresKVStoreConfig
+
+
+class PostgresKVStoreImpl(KVStore):
+    def __init__(self, config: PostgresKVStoreConfig):
+        self.config = config
+        self.conn = None
+        self.cursor = None
+
+    async def initialize(self) -> None:
+        try:
+            self.conn = psycopg2.connect(
+                host=self.config.host,
+                port=self.config.port,
+                database=self.config.db,
+                user=self.config.user,
+                password=self.config.password,
+            )
+            self.conn.autocommit = True
+            self.cursor = self.conn.cursor(cursor_factory=DictCursor)
+
+            # Create table if it doesn't exist
+            self.cursor.execute(
+                f"""
+                CREATE TABLE IF NOT EXISTS {self.config.table_name} (
+                    key TEXT PRIMARY KEY,
+                    value TEXT,
+                    expiration TIMESTAMP
+                )
+                """
+            )
+        except Exception as e:
+            import traceback
+
+            traceback.print_exc()
+            raise RuntimeError("Could not connect to PostgreSQL database server") from e
+
+    def _namespaced_key(self, key: str) -> str:
+        if not self.config.namespace:
+            return key
+        return f"{self.config.namespace}:{key}"
+
+    async def set(
+        self, key: str, value: str, expiration: Optional[datetime] = None
+    ) -> None:
+        key = self._namespaced_key(key)
+        self.cursor.execute(
+            f"""
+            INSERT INTO {self.config.table_name} (key, value, expiration)
+            VALUES (%s, %s, %s)
+            ON CONFLICT (key) DO UPDATE
+            SET value = EXCLUDED.value, expiration = EXCLUDED.expiration
+            """,
+            (key, value, expiration),
+        )
+
+    async def get(self, key: str) -> Optional[str]:
+        key = self._namespaced_key(key)
+        self.cursor.execute(
+            f"""
+            SELECT value FROM {self.config.table_name}
+            WHERE key = %s
+            AND (expiration IS NULL OR expiration > NOW())
+            """,
+            (key,),
+        )
+        result = self.cursor.fetchone()
+        return result[0] if result else None
+
+    async def delete(self, key: str) -> None:
+        key = self._namespaced_key(key)
+        self.cursor.execute(
+            f"DELETE FROM {self.config.table_name} WHERE key = %s",
+            (key,),
+        )
+
+    async def range(self, start_key: str, end_key: str) -> List[str]:
+        start_key = self._namespaced_key(start_key)
+        end_key = self._namespaced_key(end_key)
+
+        self.cursor.execute(
+            f"""
+            SELECT value FROM {self.config.table_name}
+            WHERE key >= %s AND key < %s
+            AND (expiration IS NULL OR expiration > NOW())
+            ORDER BY key
+            """,
+            (start_key, end_key),
+        )
+        return [row[0] for row in self.cursor.fetchall()]