rfc: Add options for supporting various embedding models

Hoist options into the method args directly. Make TextTrunction.none be explicit.

make text_truncation optional

rename
This commit is contained in:
Ashwin Bharambe 2025-02-20 16:32:03 -08:00
parent 6f9d622340
commit e011491c6b
3 changed files with 73 additions and 0 deletions

View file

@ -4944,6 +4944,27 @@
} }
], ],
"description": "List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text." "description": "List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text."
},
"text_truncation": {
"type": "string",
"enum": [
"none",
"start",
"end"
],
"description": "(Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length."
},
"output_dimension": {
"type": "integer",
"description": "(Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models."
},
"task_type": {
"type": "string",
"enum": [
"query",
"document"
],
"description": "(Optional) How is the embedding being used? This is only supported by asymmetric embedding models."
} }
}, },
"additionalProperties": false, "additionalProperties": false,

View file

@ -3235,6 +3235,28 @@ components:
List of contents to generate embeddings for. Each content can be a string List of contents to generate embeddings for. Each content can be a string
or an InterleavedContentItem (and hence can be multimodal). The behavior or an InterleavedContentItem (and hence can be multimodal). The behavior
depends on the model and provider. Some models may only support text. depends on the model and provider. Some models may only support text.
text_truncation:
type: string
enum:
- none
- start
- end
description: >-
(Optional) Config for how to truncate text for embedding when text is
longer than the model's max sequence length.
output_dimension:
type: integer
description: >-
(Optional) Output dimensionality for the embeddings. Only supported by
Matryoshka models.
task_type:
type: string
enum:
- query
- document
description: >-
(Optional) How is the embedding being used? This is only supported by
asymmetric embedding models.
additionalProperties: false additionalProperties: false
required: required:
- model_id - model_id

View file

@ -402,6 +402,30 @@ class ModelStore(Protocol):
def get_model(self, identifier: str) -> Model: ... def get_model(self, identifier: str) -> Model: ...
class TextTruncation(Enum):
"""Config for how to truncate text for embedding when text is longer than the model's max sequence length. Start and End semantics depend on whether the language is left-to-right or right-to-left.
:cvar none: No truncation (default). If the text is longer than the model's max sequence length, you will get an error.
:cvar start: Truncate from the start
:cvar end: Truncate from the end
"""
none = "none"
start = "start"
end = "end"
class EmbeddingTaskType(Enum):
"""How is the embedding being used? This is only supported by asymmetric embedding models.
:cvar query: Used for a query for semantic search.
:cvar document: Used at indexing time when ingesting documents.
"""
query = "query"
document = "document"
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class Inference(Protocol): class Inference(Protocol):
@ -482,11 +506,17 @@ class Inference(Protocol):
self, self,
model_id: str, model_id: str,
contents: List[str] | List[InterleavedContentItem], contents: List[str] | List[InterleavedContentItem],
text_truncation: Optional[TextTruncation] = TextTruncation.none,
output_dimension: Optional[int] = None,
task_type: Optional[EmbeddingTaskType] = None,
) -> EmbeddingsResponse: ) -> EmbeddingsResponse:
"""Generate embeddings for content pieces using the specified model. """Generate embeddings for content pieces using the specified model.
:param model_id: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint. :param model_id: The identifier of the model to use. The model must be an embedding model registered with Llama Stack and available via the /models endpoint.
:param contents: List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text. :param contents: List of contents to generate embeddings for. Each content can be a string or an InterleavedContentItem (and hence can be multimodal). The behavior depends on the model and provider. Some models may only support text.
:param output_dimension: (Optional) Output dimensionality for the embeddings. Only supported by Matryoshka models.
:param text_truncation: (Optional) Config for how to truncate text for embedding when text is longer than the model's max sequence length.
:param task_type: (Optional) How is the embedding being used? This is only supported by asymmetric embedding models.
:returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id} :returns: An array of embeddings, one for each content. Each embedding is a list of floats. The dimensionality of the embedding is model-specific; you can check model metadata using /models/{model_id}
""" """
... ...