mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-06-29 11:24:19 +00:00
Update the "InterleavedTextMedia" type (#635)
## What does this PR do? This is a long-pending change and particularly important to get done now. Specifically: - we cannot "localize" (aka download) any URLs from media attachments anywhere near our modeling code. it must be done within llama-stack. - `PIL.Image` is infesting all our APIs via `ImageMedia -> InterleavedTextMedia` and that cannot be right at all. Anything in the API surface must be "naturally serializable". We need a standard `{ type: "image", image_url: "<...>" }` which is more extensible - `UserMessage`, `SystemMessage`, etc. are moved completely to llama-stack from the llama-models repository. See https://github.com/meta-llama/llama-models/pull/244 for the corresponding PR in llama-models. ## Test Plan ```bash cd llama_stack/providers/tests pytest -s -v -k "fireworks or ollama or together" inference/test_vision_inference.py pytest -s -v -k "(fireworks or ollama or together) and llama_3b" inference/test_text_inference.py pytest -s -v -k chroma memory/test_memory.py \ --env EMBEDDING_DIMENSION=384 --env CHROMA_DB_PATH=/tmp/foobar pytest -s -v -k fireworks agents/test_agents.py \ --safety-shield=meta-llama/Llama-Guard-3-8B \ --inference-model=meta-llama/Llama-3.1-8B-Instruct ``` Updated the client sdk (see PR ...), installed the SDK in the same environment and then ran the SDK tests: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=together pytest -s -v agents/test_agents.py LLAMA_STACK_CONFIG=ollama pytest -s -v memory/test_memory.py # this one needed a bit of hacking in the run.yaml to ensure I could register the vision model correctly INFERENCE_MODEL=llama3.2-vision:latest LLAMA_STACK_CONFIG=ollama pytest -s -v inference/test_inference.py ```
This commit is contained in:
parent
10eb31badf
commit
8de8eb03c8
66 changed files with 1344 additions and 1801 deletions
60
llama_stack/apis/common/content_types.py
Normal file
60
llama_stack/apis/common/content_types.py
Normal file
|
@ -0,0 +1,60 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from typing import Annotated, List, Literal, Optional, Union
|
||||
|
||||
from llama_models.schema_utils import json_schema_type, register_schema
|
||||
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
|
||||
|
||||
@json_schema_type(
|
||||
schema={"type": "string", "format": "uri", "pattern": "^(https?://|file://|data:)"}
|
||||
)
|
||||
class URL(BaseModel):
|
||||
uri: str
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.uri
|
||||
|
||||
|
||||
class _URLOrData(BaseModel):
|
||||
url: Optional[URL] = None
|
||||
data: Optional[bytes] = None
|
||||
|
||||
@model_validator(mode="before")
|
||||
@classmethod
|
||||
def validator(cls, values):
|
||||
if isinstance(values, dict):
|
||||
return values
|
||||
return {"url": values}
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class ImageContentItem(_URLOrData):
|
||||
type: Literal["image"] = "image"
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class TextContentItem(BaseModel):
|
||||
type: Literal["text"] = "text"
|
||||
text: str
|
||||
|
||||
|
||||
# other modalities can be added here
|
||||
InterleavedContentItem = register_schema(
|
||||
Annotated[
|
||||
Union[ImageContentItem, TextContentItem],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
name="InterleavedContentItem",
|
||||
)
|
||||
|
||||
# accept a single "str" as a special case since it is common
|
||||
InterleavedContent = register_schema(
|
||||
Union[str, InterleavedContentItem, List[InterleavedContentItem]],
|
||||
name="InterleavedContent",
|
||||
)
|
|
@ -7,12 +7,12 @@
|
|||
from enum import Enum
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
from llama_models.llama3.api.datatypes import URL
|
||||
|
||||
from llama_models.schema_utils import json_schema_type
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from llama_stack.apis.common.content_types import URL
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class RestAPIMethod(Enum):
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
from typing import Literal, Union
|
||||
|
||||
from llama_models.schema_utils import register_schema
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import Annotated
|
||||
|
||||
|
@ -53,21 +54,24 @@ class AgentTurnInputType(BaseModel):
|
|||
type: Literal["agent_turn_input"] = "agent_turn_input"
|
||||
|
||||
|
||||
ParamType = Annotated[
|
||||
Union[
|
||||
StringType,
|
||||
NumberType,
|
||||
BooleanType,
|
||||
ArrayType,
|
||||
ObjectType,
|
||||
JsonType,
|
||||
UnionType,
|
||||
ChatCompletionInputType,
|
||||
CompletionInputType,
|
||||
AgentTurnInputType,
|
||||
ParamType = register_schema(
|
||||
Annotated[
|
||||
Union[
|
||||
StringType,
|
||||
NumberType,
|
||||
BooleanType,
|
||||
ArrayType,
|
||||
ObjectType,
|
||||
JsonType,
|
||||
UnionType,
|
||||
ChatCompletionInputType,
|
||||
CompletionInputType,
|
||||
AgentTurnInputType,
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
],
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
name="ParamType",
|
||||
)
|
||||
|
||||
# TODO: recursive definition of ParamType in these containers
|
||||
# will cause infinite recursion in OpenAPI generation script
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue