mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-24 01:38:05 +00:00
Merge branch 'main' into add-mcp-streamable-http-support
This commit is contained in:
commit
e027a526c9
81 changed files with 811 additions and 689 deletions
|
|
@ -17,5 +17,5 @@ class BraintrustScoringConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_api_key": "${env.OPENAI_API_KEY:+}",
|
||||
"openai_api_key": "${env.OPENAI_API_KEY:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,9 +23,9 @@ class MilvusVectorIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"db_path": f"${{env.MILVUS_DB_PATH:={__distro_dir__}/milvus.db}}",
|
||||
"db_path": "${env.MILVUS_DB_PATH:=" + __distro_dir__ + "}/" + "milvus.db",
|
||||
"kvstore": SqliteKVStoreConfig.sample_run_config(
|
||||
__distro_dir__=__distro_dir__,
|
||||
db_name=f"${{env.MILVUS_KVSTORE_DB_PATH:={__distro_dir__}/milvus_registry.db}}",
|
||||
db_name="milvus_registry.db",
|
||||
),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -520,7 +520,7 @@ Please refer to the inline provider documentation.
|
|||
Api.vector_io,
|
||||
AdapterSpec(
|
||||
adapter_type="milvus",
|
||||
pip_packages=["pymilvus"],
|
||||
pip_packages=["pymilvus[marshmallow<3.13.0]"],
|
||||
module="llama_stack.providers.remote.vector_io.milvus",
|
||||
config_class="llama_stack.providers.remote.vector_io.milvus.MilvusVectorIOConfig",
|
||||
description="""
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ class NvidiaDatasetIOConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:=}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||
"datasets_url": "${env.NVIDIA_DATASETS_URL:=http://nemo.test}",
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ class NVIDIAConfig(BaseModel):
|
|||
description="A base url for accessing the NVIDIA NIM",
|
||||
)
|
||||
api_key: SecretStr | None = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_API_KEY"),
|
||||
default_factory=lambda: SecretStr(os.getenv("NVIDIA_API_KEY")),
|
||||
description="The NVIDIA API key, only needed of using the hosted service",
|
||||
)
|
||||
timeout: int = Field(
|
||||
|
|
@ -53,9 +53,15 @@ class NVIDIAConfig(BaseModel):
|
|||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||
api_key: str = "${env.NVIDIA_API_KEY:=}",
|
||||
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"append_api_version": "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||
"url": url,
|
||||
"api_key": api_key,
|
||||
"append_api_version": append_api_version,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
import base64
|
||||
import uuid
|
||||
from collections.abc import AsyncGenerator, AsyncIterator
|
||||
from typing import Any
|
||||
|
|
@ -77,6 +78,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
content_has_media,
|
||||
convert_image_content_to_url,
|
||||
interleaved_content_as_str,
|
||||
localize_image_content,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
|
@ -496,6 +498,21 @@ class OllamaInferenceAdapter(
|
|||
user: str | None = None,
|
||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||
model_obj = await self._get_model(model)
|
||||
|
||||
# Ollama does not support image urls, so we need to download the image and convert it to base64
|
||||
async def _convert_message(m: OpenAIMessageParam) -> OpenAIMessageParam:
|
||||
if isinstance(m.content, list):
|
||||
for c in m.content:
|
||||
if c.type == "image_url" and c.image_url and c.image_url.url:
|
||||
localize_result = await localize_image_content(c.image_url.url)
|
||||
if localize_result is None:
|
||||
raise ValueError(f"Failed to localize image content from {c.image_url.url}")
|
||||
|
||||
content, format = localize_result
|
||||
c.image_url.url = f"data:image/{format};base64,{base64.b64encode(content).decode('utf-8')}"
|
||||
return m
|
||||
|
||||
messages = [await _convert_message(m) for m in messages]
|
||||
params = await prepare_openai_completion_params(
|
||||
model=model_obj.provider_resource_id,
|
||||
messages=messages,
|
||||
|
|
|
|||
|
|
@ -25,6 +25,6 @@ class RunpodImplConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.RUNPOD_URL:+}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN:+}",
|
||||
"url": "${env.RUNPOD_URL:=}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,5 +26,5 @@ class TogetherImplConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY:+}",
|
||||
"api_key": "${env.TOGETHER_API_KEY:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,6 @@ class WatsonXConfig(BaseModel):
|
|||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
|
||||
"api_key": "${env.WATSONX_API_KEY:+}",
|
||||
"project_id": "${env.WATSONX_PROJECT_ID:+}",
|
||||
"api_key": "${env.WATSONX_API_KEY:=}",
|
||||
"project_id": "${env.WATSONX_PROJECT_ID:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ class NvidiaPostTrainingConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.NVIDIA_API_KEY:+}",
|
||||
"api_key": "${env.NVIDIA_API_KEY:=}",
|
||||
"dataset_namespace": "${env.NVIDIA_DATASET_NAMESPACE:=default}",
|
||||
"project_id": "${env.NVIDIA_PROJECT_ID:=test-project}",
|
||||
"customizer_url": "${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}",
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@ class BraveSearchToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.BRAVE_SEARCH_API_KEY:+}",
|
||||
"api_key": "${env.BRAVE_SEARCH_API_KEY:=}",
|
||||
"max_results": 3,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@ class TavilySearchToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.TAVILY_SEARCH_API_KEY:+}",
|
||||
"api_key": "${env.TAVILY_SEARCH_API_KEY:=}",
|
||||
"max_results": 3,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -17,5 +17,5 @@ class WolframAlphaToolConfig(BaseModel):
|
|||
@classmethod
|
||||
def sample_run_config(cls, __distro_dir__: str, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:+}",
|
||||
"api_key": "${env.WOLFRAM_ALPHA_API_KEY:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -180,11 +180,10 @@ def request_has_media(request: ChatCompletionRequest | CompletionRequest):
|
|||
return content_has_media(request.content)
|
||||
|
||||
|
||||
async def localize_image_content(media: ImageContentItem) -> tuple[bytes, str]:
|
||||
image = media.image
|
||||
if image.url and image.url.uri.startswith("http"):
|
||||
async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
|
||||
if uri.startswith("http"):
|
||||
async with httpx.AsyncClient() as client:
|
||||
r = await client.get(image.url.uri)
|
||||
r = await client.get(uri)
|
||||
content = r.content
|
||||
content_type = r.headers.get("content-type")
|
||||
if content_type:
|
||||
|
|
@ -194,11 +193,7 @@ async def localize_image_content(media: ImageContentItem) -> tuple[bytes, str]:
|
|||
|
||||
return content, format
|
||||
else:
|
||||
# data is a base64 encoded string, decode it to bytes first
|
||||
# TODO(mf): do this more efficiently, decode less
|
||||
data_bytes = base64.b64decode(image.data)
|
||||
pil_image = PIL_Image.open(io.BytesIO(data_bytes))
|
||||
return data_bytes, pil_image.format
|
||||
return None
|
||||
|
||||
|
||||
async def convert_image_content_to_url(
|
||||
|
|
@ -208,7 +203,18 @@ async def convert_image_content_to_url(
|
|||
if image.url and (not download or image.url.uri.startswith("data")):
|
||||
return image.url.uri
|
||||
|
||||
content, format = await localize_image_content(media)
|
||||
if image.data:
|
||||
# data is a base64 encoded string, decode it to bytes first
|
||||
# TODO(mf): do this more efficiently, decode less
|
||||
content = base64.b64decode(image.data)
|
||||
pil_image = PIL_Image.open(io.BytesIO(content))
|
||||
format = pil_image.format
|
||||
else:
|
||||
localize_result = await localize_image_content(image.url.uri)
|
||||
if localize_result is None:
|
||||
raise ValueError(f"Failed to localize image content from {image.url.uri}")
|
||||
content, format = localize_result
|
||||
|
||||
if include_format:
|
||||
return f"data:image/{format};base64," + base64.b64encode(content).decode("utf-8")
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@ import uuid
|
|||
|
||||
|
||||
def generate_chunk_id(document_id: str, chunk_text: str) -> str:
|
||||
"""Generate a unique chunk ID using a hash of document ID and chunk text."""
|
||||
"""
|
||||
Generate a unique chunk ID using a hash of the document ID and chunk text.
|
||||
|
||||
Note: MD5 is used only to calculate an identifier, not for security purposes.
|
||||
Adding usedforsecurity=False for compatibility with FIPS environments.
|
||||
"""
|
||||
hash_input = f"{document_id}:{chunk_text}".encode()
|
||||
return str(uuid.UUID(hashlib.md5(hash_input).hexdigest()))
|
||||
return str(uuid.UUID(hashlib.md5(hash_input, usedforsecurity=False).hexdigest()))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue