mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-16 23:03:49 +00:00
Merge branch 'main' into santiagxf/azure-ai-inference
This commit is contained in:
commit
5c429b0b67
273 changed files with 5491 additions and 5418 deletions
76
llama_stack/providers/utils/bedrock/client.py
Normal file
76
llama_stack/providers/utils/bedrock/client.py
Normal file
|
@ -0,0 +1,76 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import boto3
|
||||
from botocore.client import BaseClient
|
||||
from botocore.config import Config
|
||||
|
||||
from llama_stack.providers.utils.bedrock.config import BedrockBaseConfig
|
||||
from llama_stack.providers.utils.bedrock.refreshable_boto_session import (
|
||||
RefreshableBotoSession,
|
||||
)
|
||||
|
||||
|
||||
def create_bedrock_client(
|
||||
config: BedrockBaseConfig, service_name: str = "bedrock-runtime"
|
||||
) -> BaseClient:
|
||||
"""Creates a boto3 client for Bedrock services with the given configuration.
|
||||
|
||||
Args:
|
||||
config: The Bedrock configuration containing AWS credentials and settings
|
||||
service_name: The AWS service name to create client for (default: "bedrock-runtime")
|
||||
|
||||
Returns:
|
||||
A configured boto3 client
|
||||
"""
|
||||
if config.aws_access_key_id and config.aws_secret_access_key:
|
||||
retries_config = {
|
||||
k: v
|
||||
for k, v in dict(
|
||||
total_max_attempts=config.total_max_attempts,
|
||||
mode=config.retry_mode,
|
||||
).items()
|
||||
if v is not None
|
||||
}
|
||||
|
||||
config_args = {
|
||||
k: v
|
||||
for k, v in dict(
|
||||
region_name=config.region_name,
|
||||
retries=retries_config if retries_config else None,
|
||||
connect_timeout=config.connect_timeout,
|
||||
read_timeout=config.read_timeout,
|
||||
).items()
|
||||
if v is not None
|
||||
}
|
||||
|
||||
boto3_config = Config(**config_args)
|
||||
|
||||
session_args = {
|
||||
"aws_access_key_id": config.aws_access_key_id,
|
||||
"aws_secret_access_key": config.aws_secret_access_key,
|
||||
"aws_session_token": config.aws_session_token,
|
||||
"region_name": config.region_name,
|
||||
"profile_name": config.profile_name,
|
||||
"session_ttl": config.session_ttl,
|
||||
}
|
||||
|
||||
# Remove None values
|
||||
session_args = {k: v for k, v in session_args.items() if v is not None}
|
||||
|
||||
boto3_session = boto3.session.Session(**session_args)
|
||||
return boto3_session.client(service_name, config=boto3_config)
|
||||
else:
|
||||
return (
|
||||
RefreshableBotoSession(
|
||||
region_name=config.region_name,
|
||||
profile_name=config.profile_name,
|
||||
session_ttl=config.session_ttl,
|
||||
)
|
||||
.refreshable_session()
|
||||
.client(service_name)
|
||||
)
|
59
llama_stack/providers/utils/bedrock/config.py
Normal file
59
llama_stack/providers/utils/bedrock/config.py
Normal file
|
@ -0,0 +1,59 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from typing import Optional
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class BedrockBaseConfig(BaseModel):
|
||||
aws_access_key_id: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The AWS access key to use. Default use environment variable: AWS_ACCESS_KEY_ID",
|
||||
)
|
||||
aws_secret_access_key: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The AWS secret access key to use. Default use environment variable: AWS_SECRET_ACCESS_KEY",
|
||||
)
|
||||
aws_session_token: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The AWS session token to use. Default use environment variable: AWS_SESSION_TOKEN",
|
||||
)
|
||||
region_name: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The default AWS Region to use, for example, us-west-1 or us-west-2."
|
||||
"Default use environment variable: AWS_DEFAULT_REGION",
|
||||
)
|
||||
profile_name: Optional[str] = Field(
|
||||
default=None,
|
||||
description="The profile name that contains credentials to use."
|
||||
"Default use environment variable: AWS_PROFILE",
|
||||
)
|
||||
total_max_attempts: Optional[int] = Field(
|
||||
default=None,
|
||||
description="An integer representing the maximum number of attempts that will be made for a single request, "
|
||||
"including the initial attempt. Default use environment variable: AWS_MAX_ATTEMPTS",
|
||||
)
|
||||
retry_mode: Optional[str] = Field(
|
||||
default=None,
|
||||
description="A string representing the type of retries Boto3 will perform."
|
||||
"Default use environment variable: AWS_RETRY_MODE",
|
||||
)
|
||||
connect_timeout: Optional[float] = Field(
|
||||
default=60,
|
||||
description="The time in seconds till a timeout exception is thrown when attempting to make a connection. "
|
||||
"The default is 60 seconds.",
|
||||
)
|
||||
read_timeout: Optional[float] = Field(
|
||||
default=60,
|
||||
description="The time in seconds till a timeout exception is thrown when attempting to read from a connection."
|
||||
"The default is 60 seconds.",
|
||||
)
|
||||
session_ttl: Optional[int] = Field(
|
||||
default=3600,
|
||||
description="The time in seconds till a session expires. The default is 3600 seconds (1 hour).",
|
||||
)
|
116
llama_stack/providers/utils/bedrock/refreshable_boto_session.py
Normal file
116
llama_stack/providers/utils/bedrock/refreshable_boto_session.py
Normal file
|
@ -0,0 +1,116 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import datetime
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
|
||||
from boto3 import Session
|
||||
from botocore.credentials import RefreshableCredentials
|
||||
from botocore.session import get_session
|
||||
|
||||
|
||||
class RefreshableBotoSession:
|
||||
"""
|
||||
Boto Helper class which lets us create a refreshable session so that we can cache the client or resource.
|
||||
|
||||
Usage
|
||||
-----
|
||||
session = RefreshableBotoSession().refreshable_session()
|
||||
|
||||
client = session.client("s3") # we now can cache this client object without worrying about expiring credentials
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
region_name: str = None,
|
||||
profile_name: str = None,
|
||||
sts_arn: str = None,
|
||||
session_name: str = None,
|
||||
session_ttl: int = 30000,
|
||||
):
|
||||
"""
|
||||
Initialize `RefreshableBotoSession`
|
||||
|
||||
Parameters
|
||||
----------
|
||||
region_name : str (optional)
|
||||
Default region when creating a new connection.
|
||||
|
||||
profile_name : str (optional)
|
||||
The name of a profile to use.
|
||||
|
||||
sts_arn : str (optional)
|
||||
The role arn to sts before creating a session.
|
||||
|
||||
session_name : str (optional)
|
||||
An identifier for the assumed role session. (required when `sts_arn` is given)
|
||||
|
||||
session_ttl : int (optional)
|
||||
An integer number to set the TTL for each session. Beyond this session, it will renew the token.
|
||||
50 minutes by default which is before the default role expiration of 1 hour
|
||||
"""
|
||||
|
||||
self.region_name = region_name
|
||||
self.profile_name = profile_name
|
||||
self.sts_arn = sts_arn
|
||||
self.session_name = session_name or uuid4().hex
|
||||
self.session_ttl = session_ttl
|
||||
|
||||
def __get_session_credentials(self):
|
||||
"""
|
||||
Get session credentials
|
||||
"""
|
||||
session = Session(region_name=self.region_name, profile_name=self.profile_name)
|
||||
|
||||
# if sts_arn is given, get credential by assuming the given role
|
||||
if self.sts_arn:
|
||||
sts_client = session.client(
|
||||
service_name="sts", region_name=self.region_name
|
||||
)
|
||||
response = sts_client.assume_role(
|
||||
RoleArn=self.sts_arn,
|
||||
RoleSessionName=self.session_name,
|
||||
DurationSeconds=self.session_ttl,
|
||||
).get("Credentials")
|
||||
|
||||
credentials = {
|
||||
"access_key": response.get("AccessKeyId"),
|
||||
"secret_key": response.get("SecretAccessKey"),
|
||||
"token": response.get("SessionToken"),
|
||||
"expiry_time": response.get("Expiration").isoformat(),
|
||||
}
|
||||
else:
|
||||
session_credentials = session.get_credentials().get_frozen_credentials()
|
||||
credentials = {
|
||||
"access_key": session_credentials.access_key,
|
||||
"secret_key": session_credentials.secret_key,
|
||||
"token": session_credentials.token,
|
||||
"expiry_time": datetime.datetime.fromtimestamp(
|
||||
time() + self.session_ttl, datetime.timezone.utc
|
||||
).isoformat(),
|
||||
}
|
||||
|
||||
return credentials
|
||||
|
||||
def refreshable_session(self) -> Session:
|
||||
"""
|
||||
Get refreshable boto3 session.
|
||||
"""
|
||||
# Get refreshable credentials
|
||||
refreshable_credentials = RefreshableCredentials.create_from_metadata(
|
||||
metadata=self.__get_session_credentials(),
|
||||
refresh_using=self.__get_session_credentials,
|
||||
method="sts-assume-role",
|
||||
)
|
||||
|
||||
# attach refreshable credentials current session
|
||||
session = get_session()
|
||||
session._credentials = refreshable_credentials
|
||||
session.set_config_variable("region", self.region_name)
|
||||
autorefresh_session = Session(botocore_session=session)
|
||||
|
||||
return autorefresh_session
|
|
@ -49,6 +49,9 @@ def text_from_choice(choice) -> str:
|
|||
if hasattr(choice, "message"):
|
||||
return choice.message.content
|
||||
|
||||
if hasattr(choice, "message"):
|
||||
return choice.message.content
|
||||
|
||||
return choice.text
|
||||
|
||||
|
||||
|
@ -102,7 +105,6 @@ def process_chat_completion_response(
|
|||
async def process_completion_stream_response(
|
||||
stream: AsyncGenerator[OpenAICompatCompletionResponse, None], formatter: ChatFormat
|
||||
) -> AsyncGenerator:
|
||||
|
||||
stop_reason = None
|
||||
|
||||
async for chunk in stream:
|
||||
|
@ -162,6 +164,7 @@ async def process_chat_completion_stream_response(
|
|||
|
||||
text = text_from_choice(choice)
|
||||
if not text:
|
||||
# Sometimes you get empty chunks from providers
|
||||
continue
|
||||
|
||||
# check if its a tool call ( aka starts with <|python_tag|> )
|
||||
|
|
|
@ -3,10 +3,16 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
from typing import Tuple
|
||||
|
||||
import httpx
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from PIL import Image as PIL_Image
|
||||
from termcolor import cprint
|
||||
|
||||
from llama_models.llama3.api.datatypes import * # noqa: F403
|
||||
|
@ -24,6 +30,92 @@ from llama_models.sku_list import resolve_model
|
|||
from llama_stack.providers.utils.inference import supported_inference_models
|
||||
|
||||
|
||||
def content_has_media(content: InterleavedTextMedia):
|
||||
def _has_media_content(c):
|
||||
return isinstance(c, ImageMedia)
|
||||
|
||||
if isinstance(content, list):
|
||||
return any(_has_media_content(c) for c in content)
|
||||
else:
|
||||
return _has_media_content(content)
|
||||
|
||||
|
||||
def messages_have_media(messages: List[Message]):
|
||||
return any(content_has_media(m.content) for m in messages)
|
||||
|
||||
|
||||
def request_has_media(request: Union[ChatCompletionRequest, CompletionRequest]):
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
return messages_have_media(request.messages)
|
||||
else:
|
||||
return content_has_media(request.content)
|
||||
|
||||
|
||||
async def convert_image_media_to_url(
|
||||
media: ImageMedia, download: bool = False, include_format: bool = True
|
||||
) -> str:
|
||||
if isinstance(media.image, PIL_Image.Image):
|
||||
if media.image.format == "PNG":
|
||||
format = "png"
|
||||
elif media.image.format == "GIF":
|
||||
format = "gif"
|
||||
elif media.image.format == "JPEG":
|
||||
format = "jpeg"
|
||||
else:
|
||||
raise ValueError(f"Unsupported image format {media.image.format}")
|
||||
|
||||
bytestream = io.BytesIO()
|
||||
media.image.save(bytestream, format=media.image.format)
|
||||
bytestream.seek(0)
|
||||
content = bytestream.getvalue()
|
||||
else:
|
||||
if not download:
|
||||
return media.image.uri
|
||||
else:
|
||||
assert isinstance(media.image, URL)
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(media.image.uri)
|
||||
content = r.content
|
||||
content_type = r.headers.get("content-type")
|
||||
if content_type:
|
||||
format = content_type.split("/")[-1]
|
||||
else:
|
||||
format = "png"
|
||||
|
||||
if include_format:
|
||||
return f"data:image/{format};base64," + base64.b64encode(content).decode(
|
||||
"utf-8"
|
||||
)
|
||||
else:
|
||||
return base64.b64encode(content).decode("utf-8")
|
||||
|
||||
|
||||
# TODO: name this function better! this is about OpenAI compatibile image
|
||||
# media conversion of the message. this should probably go in openai_compat.py
|
||||
async def convert_message_to_dict(message: Message, download: bool = False) -> dict:
|
||||
async def _convert_content(content) -> dict:
|
||||
if isinstance(content, ImageMedia):
|
||||
return {
|
||||
"type": "image_url",
|
||||
"image_url": {
|
||||
"url": await convert_image_media_to_url(content, download=download),
|
||||
},
|
||||
}
|
||||
else:
|
||||
assert isinstance(content, str)
|
||||
return {"type": "text", "text": content}
|
||||
|
||||
if isinstance(message.content, list):
|
||||
content = [await _convert_content(c) for c in message.content]
|
||||
else:
|
||||
content = [await _convert_content(message.content)]
|
||||
|
||||
return {
|
||||
"role": message.role,
|
||||
"content": content,
|
||||
}
|
||||
|
||||
|
||||
def completion_request_to_prompt(
|
||||
request: CompletionRequest, formatter: ChatFormat
|
||||
) -> str:
|
||||
|
|
|
@ -4,10 +4,11 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
from enum import Enum
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
from typing_extensions import Annotated
|
||||
|
||||
from llama_stack.distribution.utils.config_dirs import RUNTIME_BASE_DIR
|
||||
|
@ -51,6 +52,23 @@ class PostgresKVStoreConfig(CommonConfig):
|
|||
db: str = "llamastack"
|
||||
user: str
|
||||
password: Optional[str] = None
|
||||
table_name: str = "llamastack_kvstore"
|
||||
|
||||
@classmethod
|
||||
@field_validator("table_name")
|
||||
def validate_table_name(cls, v: str) -> str:
|
||||
# PostgreSQL identifiers rules:
|
||||
# - Must start with a letter or underscore
|
||||
# - Can contain letters, numbers, and underscores
|
||||
# - Maximum length is 63 bytes
|
||||
pattern = r"^[a-zA-Z_][a-zA-Z0-9_]*$"
|
||||
if not re.match(pattern, v):
|
||||
raise ValueError(
|
||||
"Invalid table name. Must start with letter or underscore and contain only letters, numbers, and underscores"
|
||||
)
|
||||
if len(v) > 63:
|
||||
raise ValueError("Table name must be less than 63 characters")
|
||||
return v
|
||||
|
||||
|
||||
KVStoreConfig = Annotated[
|
||||
|
|
|
@ -43,7 +43,9 @@ async def kvstore_impl(config: KVStoreConfig) -> KVStore:
|
|||
|
||||
impl = SqliteKVStoreImpl(config)
|
||||
elif config.type == KVStoreType.postgres.value:
|
||||
raise NotImplementedError()
|
||||
from .postgres import PostgresKVStoreImpl
|
||||
|
||||
impl = PostgresKVStoreImpl(config)
|
||||
else:
|
||||
raise ValueError(f"Unknown kvstore type {config.type}")
|
||||
|
||||
|
|
7
llama_stack/providers/utils/kvstore/postgres/__init__.py
Normal file
7
llama_stack/providers/utils/kvstore/postgres/__init__.py
Normal file
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .postgres import PostgresKVStoreImpl # noqa: F401 F403
|
103
llama_stack/providers/utils/kvstore/postgres/postgres.py
Normal file
103
llama_stack/providers/utils/kvstore/postgres/postgres.py
Normal file
|
@ -0,0 +1,103 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from datetime import datetime
|
||||
from typing import List, Optional
|
||||
|
||||
import psycopg2
|
||||
from psycopg2.extras import DictCursor
|
||||
|
||||
from ..api import KVStore
|
||||
from ..config import PostgresKVStoreConfig
|
||||
|
||||
|
||||
class PostgresKVStoreImpl(KVStore):
|
||||
def __init__(self, config: PostgresKVStoreConfig):
|
||||
self.config = config
|
||||
self.conn = None
|
||||
self.cursor = None
|
||||
|
||||
async def initialize(self) -> None:
|
||||
try:
|
||||
self.conn = psycopg2.connect(
|
||||
host=self.config.host,
|
||||
port=self.config.port,
|
||||
database=self.config.db,
|
||||
user=self.config.user,
|
||||
password=self.config.password,
|
||||
)
|
||||
self.conn.autocommit = True
|
||||
self.cursor = self.conn.cursor(cursor_factory=DictCursor)
|
||||
|
||||
# Create table if it doesn't exist
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
CREATE TABLE IF NOT EXISTS {self.config.table_name} (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT,
|
||||
expiration TIMESTAMP
|
||||
)
|
||||
"""
|
||||
)
|
||||
except Exception as e:
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
raise RuntimeError("Could not connect to PostgreSQL database server") from e
|
||||
|
||||
def _namespaced_key(self, key: str) -> str:
|
||||
if not self.config.namespace:
|
||||
return key
|
||||
return f"{self.config.namespace}:{key}"
|
||||
|
||||
async def set(
|
||||
self, key: str, value: str, expiration: Optional[datetime] = None
|
||||
) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
INSERT INTO {self.config.table_name} (key, value, expiration)
|
||||
VALUES (%s, %s, %s)
|
||||
ON CONFLICT (key) DO UPDATE
|
||||
SET value = EXCLUDED.value, expiration = EXCLUDED.expiration
|
||||
""",
|
||||
(key, value, expiration),
|
||||
)
|
||||
|
||||
async def get(self, key: str) -> Optional[str]:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
SELECT value FROM {self.config.table_name}
|
||||
WHERE key = %s
|
||||
AND (expiration IS NULL OR expiration > NOW())
|
||||
""",
|
||||
(key,),
|
||||
)
|
||||
result = self.cursor.fetchone()
|
||||
return result[0] if result else None
|
||||
|
||||
async def delete(self, key: str) -> None:
|
||||
key = self._namespaced_key(key)
|
||||
self.cursor.execute(
|
||||
f"DELETE FROM {self.config.table_name} WHERE key = %s",
|
||||
(key,),
|
||||
)
|
||||
|
||||
async def range(self, start_key: str, end_key: str) -> List[str]:
|
||||
start_key = self._namespaced_key(start_key)
|
||||
end_key = self._namespaced_key(end_key)
|
||||
|
||||
self.cursor.execute(
|
||||
f"""
|
||||
SELECT value FROM {self.config.table_name}
|
||||
WHERE key >= %s AND key < %s
|
||||
AND (expiration IS NULL OR expiration > NOW())
|
||||
ORDER BY key
|
||||
""",
|
||||
(start_key, end_key),
|
||||
)
|
||||
return [row[0] for row in self.cursor.fetchall()]
|
Loading…
Add table
Add a link
Reference in a new issue