forked from phoenix-oss/llama-stack-mirror
Update the "InterleavedTextMedia" type (#635)
## What does this PR do? This is a long-pending change and particularly important to get done now. Specifically: - we cannot "localize" (aka download) any URLs from media attachments anywhere near our modeling code. it must be done within llama-stack. - `PIL.Image` is infesting all our APIs via `ImageMedia -> InterleavedTextMedia` and that cannot be right at all. Anything in the API surface must be "naturally serializable". We need a standard `{ type: "image", image_url: "<...>" }` which is more extensible - `UserMessage`, `SystemMessage`, etc. are moved completely to llama-stack from the llama-models repository. See https://github.com/meta-llama/llama-models/pull/244 for the corresponding PR in llama-models. ## Test Plan ```bash cd llama_stack/providers/tests pytest -s -v -k "fireworks or ollama or together" inference/test_vision_inference.py pytest -s -v -k "(fireworks or ollama or together) and llama_3b" inference/test_text_inference.py pytest -s -v -k chroma memory/test_memory.py \ --env EMBEDDING_DIMENSION=384 --env CHROMA_DB_PATH=/tmp/foobar pytest -s -v -k fireworks agents/test_agents.py \ --safety-shield=meta-llama/Llama-Guard-3-8B \ --inference-model=meta-llama/Llama-3.1-8B-Instruct ``` Updated the client sdk (see PR ...), installed the SDK in the same environment and then ran the SDK tests: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=together pytest -s -v agents/test_agents.py LLAMA_STACK_CONFIG=ollama pytest -s -v memory/test_memory.py # this one needed a bit of hacking in the run.yaml to ensure I could register the vision model correctly INFERENCE_MODEL=llama3.2-vision:latest LLAMA_STACK_CONFIG=ollama pytest -s -v inference/test_inference.py ```
This commit is contained in:
parent
10eb31badf
commit
8de8eb03c8
66 changed files with 1344 additions and 1801 deletions
|
@ -10,21 +10,24 @@ import uuid
|
|||
|
||||
from botocore.client import BaseClient
|
||||
from llama_models.datatypes import CoreModelId
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import (
|
||||
content_has_media,
|
||||
interleaved_content_as_str,
|
||||
)
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
||||
|
||||
from llama_stack.providers.remote.inference.bedrock.config import BedrockConfig
|
||||
from llama_stack.providers.utils.bedrock.client import create_bedrock_client
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||
|
||||
|
||||
MODEL_ALIASES = [
|
||||
|
@ -65,7 +68,7 @@ class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -450,7 +453,7 @@ class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
embeddings = []
|
||||
|
@ -458,7 +461,7 @@ class BedrockInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
assert not content_has_media(
|
||||
content
|
||||
), "Bedrock does not support media for embeddings"
|
||||
input_text = interleaved_text_media_as_str(content)
|
||||
input_text = interleaved_content_as_str(content)
|
||||
input_body = {"inputText": input_text}
|
||||
body = json.dumps(input_body)
|
||||
response = self.client.invoke_model(
|
||||
|
|
|
@ -10,7 +10,6 @@ from cerebras.cloud.sdk import AsyncCerebras
|
|||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
|
@ -70,7 +69,7 @@ class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -167,11 +166,11 @@ class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
raise ValueError("`top_k` not supported by Cerebras")
|
||||
|
||||
prompt = ""
|
||||
if type(request) == ChatCompletionRequest:
|
||||
if isinstance(request, ChatCompletionRequest):
|
||||
prompt = chat_completion_request_to_prompt(
|
||||
request, self.get_llama_model(request.model), self.formatter
|
||||
)
|
||||
elif type(request) == CompletionRequest:
|
||||
elif isinstance(request, CompletionRequest):
|
||||
prompt = completion_request_to_prompt(request, self.formatter)
|
||||
else:
|
||||
raise ValueError(f"Unknown request type {type(request)}")
|
||||
|
@ -186,6 +185,6 @@ class CerebrasInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
|
|
@ -10,7 +10,6 @@ from llama_models.datatypes import CoreModelId
|
|||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from openai import OpenAI
|
||||
|
@ -63,7 +62,7 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def completion(
|
||||
self,
|
||||
model: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -136,6 +135,6 @@ class DatabricksInferenceAdapter(ModelRegistryHelper, Inference):
|
|||
async def embeddings(
|
||||
self,
|
||||
model: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
|
|
@ -10,7 +10,6 @@ from fireworks.client import Fireworks
|
|||
from llama_models.datatypes import CoreModelId
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.distribution.request_headers import NeedsRequestProviderData
|
||||
|
@ -19,6 +18,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_message_to_openai_dict,
|
||||
get_sampling_options,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
|
@ -29,7 +29,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
convert_message_to_dict,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -108,7 +108,7 @@ class FireworksInferenceAdapter(
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -238,7 +238,7 @@ class FireworksInferenceAdapter(
|
|||
if isinstance(request, ChatCompletionRequest):
|
||||
if media_present:
|
||||
input_dict["messages"] = [
|
||||
await convert_message_to_dict(m) for m in request.messages
|
||||
await convert_message_to_openai_dict(m) for m in request.messages
|
||||
]
|
||||
else:
|
||||
input_dict["prompt"] = chat_completion_request_to_prompt(
|
||||
|
@ -265,7 +265,7 @@ class FireworksInferenceAdapter(
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
|
@ -277,7 +277,7 @@ class FireworksInferenceAdapter(
|
|||
), "Fireworks does not support media for embeddings"
|
||||
response = self._get_client().embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_text_media_as_str(content) for content in contents],
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
|
|
@ -8,14 +8,7 @@ import warnings
|
|||
from typing import AsyncIterator, List, Optional, Union
|
||||
|
||||
from llama_models.datatypes import SamplingParams
|
||||
from llama_models.llama3.api.datatypes import (
|
||||
ImageMedia,
|
||||
InterleavedTextMedia,
|
||||
Message,
|
||||
ToolChoice,
|
||||
ToolDefinition,
|
||||
ToolPromptFormat,
|
||||
)
|
||||
from llama_models.llama3.api.datatypes import ToolDefinition, ToolPromptFormat
|
||||
from llama_models.sku_list import CoreModelId
|
||||
from openai import APIConnectionError, AsyncOpenAI
|
||||
|
||||
|
@ -28,13 +21,17 @@ from llama_stack.apis.inference import (
|
|||
CompletionResponseStreamChunk,
|
||||
EmbeddingsResponse,
|
||||
Inference,
|
||||
InterleavedContent,
|
||||
LogProbConfig,
|
||||
Message,
|
||||
ResponseFormat,
|
||||
ToolChoice,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
build_model_alias,
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||
|
||||
from . import NVIDIAConfig
|
||||
from .openai_utils import (
|
||||
|
@ -123,17 +120,14 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
logprobs: Optional[LogProbConfig] = None,
|
||||
) -> Union[CompletionResponse, AsyncIterator[CompletionResponseStreamChunk]]:
|
||||
if isinstance(content, ImageMedia) or (
|
||||
isinstance(content, list)
|
||||
and any(isinstance(c, ImageMedia) for c in content)
|
||||
):
|
||||
raise NotImplementedError("ImageMedia is not supported")
|
||||
if content_has_media(content):
|
||||
raise NotImplementedError("Media is not supported")
|
||||
|
||||
await check_health(self._config) # this raises errors
|
||||
|
||||
|
@ -165,7 +159,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
|
|
@ -11,7 +11,6 @@ import httpx
|
|||
from llama_models.datatypes import CoreModelId
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
from ollama import AsyncClient
|
||||
|
||||
|
@ -22,8 +21,8 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
)
|
||||
|
||||
from llama_stack.apis.inference import * # noqa: F403
|
||||
from llama_stack.apis.common.content_types import ImageContentItem, TextContentItem
|
||||
from llama_stack.providers.datatypes import ModelsProtocolPrivate
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
get_sampling_options,
|
||||
OpenAICompatCompletionChoice,
|
||||
|
@ -37,7 +36,8 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
convert_image_media_to_url,
|
||||
convert_image_content_to_url,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -89,7 +89,7 @@ model_aliases = [
|
|||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias_with_just_provider_model_id(
|
||||
"llama3.2-vision",
|
||||
"llama3.2-vision:latest",
|
||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||
),
|
||||
build_model_alias(
|
||||
|
@ -141,7 +141,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -234,7 +234,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
if isinstance(request, ChatCompletionRequest):
|
||||
if media_present:
|
||||
contents = [
|
||||
await convert_message_to_dict_for_ollama(m)
|
||||
await convert_message_to_openai_dict_for_ollama(m)
|
||||
for m in request.messages
|
||||
]
|
||||
# flatten the list of lists
|
||||
|
@ -320,7 +320,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
|
@ -329,7 +329,7 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
), "Ollama does not support media for embeddings"
|
||||
response = await self.client.embed(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_text_media_as_str(content) for content in contents],
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
embeddings = response["embeddings"]
|
||||
|
||||
|
@ -358,21 +358,23 @@ class OllamaInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
return model
|
||||
|
||||
|
||||
async def convert_message_to_dict_for_ollama(message: Message) -> List[dict]:
|
||||
async def convert_message_to_openai_dict_for_ollama(message: Message) -> List[dict]:
|
||||
async def _convert_content(content) -> dict:
|
||||
if isinstance(content, ImageMedia):
|
||||
if isinstance(content, ImageContentItem):
|
||||
return {
|
||||
"role": message.role,
|
||||
"images": [
|
||||
await convert_image_media_to_url(
|
||||
await convert_image_content_to_url(
|
||||
content, download=True, include_format=False
|
||||
)
|
||||
],
|
||||
}
|
||||
else:
|
||||
text = content.text if isinstance(content, TextContentItem) else content
|
||||
assert isinstance(text, str)
|
||||
return {
|
||||
"role": message.role,
|
||||
"content": content,
|
||||
"content": text,
|
||||
}
|
||||
|
||||
if isinstance(message.content, list):
|
||||
|
|
|
@ -83,7 +83,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -267,7 +267,7 @@ class _HfAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
|
|
@ -10,7 +10,6 @@ from llama_models.datatypes import CoreModelId
|
|||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
|
||||
from together import Together
|
||||
|
@ -22,6 +21,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_message_to_openai_dict,
|
||||
get_sampling_options,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
|
@ -32,7 +32,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
convert_message_to_dict,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -92,7 +92,7 @@ class TogetherInferenceAdapter(
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -230,7 +230,7 @@ class TogetherInferenceAdapter(
|
|||
if isinstance(request, ChatCompletionRequest):
|
||||
if media_present:
|
||||
input_dict["messages"] = [
|
||||
await convert_message_to_dict(m) for m in request.messages
|
||||
await convert_message_to_openai_dict(m) for m in request.messages
|
||||
]
|
||||
else:
|
||||
input_dict["prompt"] = chat_completion_request_to_prompt(
|
||||
|
@ -252,7 +252,7 @@ class TogetherInferenceAdapter(
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
assert all(
|
||||
|
@ -260,7 +260,7 @@ class TogetherInferenceAdapter(
|
|||
), "Together does not support media for embeddings"
|
||||
r = self._get_client().embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_text_media_as_str(content) for content in contents],
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
)
|
||||
embeddings = [item.embedding for item in r.data]
|
||||
return EmbeddingsResponse(embeddings=embeddings)
|
||||
|
|
|
@ -8,7 +8,6 @@ import logging
|
|||
from typing import AsyncGenerator
|
||||
|
||||
from llama_models.llama3.api.chat_format import ChatFormat
|
||||
from llama_models.llama3.api.datatypes import Message
|
||||
from llama_models.llama3.api.tokenizer import Tokenizer
|
||||
from llama_models.sku_list import all_registered_models
|
||||
|
||||
|
@ -22,6 +21,7 @@ from llama_stack.providers.utils.inference.model_registry import (
|
|||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_message_to_openai_dict,
|
||||
get_sampling_options,
|
||||
process_chat_completion_response,
|
||||
process_chat_completion_stream_response,
|
||||
|
@ -30,7 +30,7 @@ from llama_stack.providers.utils.inference.prompt_adapter import (
|
|||
chat_completion_request_to_prompt,
|
||||
completion_request_to_prompt,
|
||||
content_has_media,
|
||||
convert_message_to_dict,
|
||||
interleaved_content_as_str,
|
||||
request_has_media,
|
||||
)
|
||||
|
||||
|
@ -71,7 +71,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
@ -163,7 +163,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
if media_present:
|
||||
# vllm does not seem to work well with image urls, so we download the images
|
||||
input_dict["messages"] = [
|
||||
await convert_message_to_dict(m, download=True)
|
||||
await convert_message_to_openai_dict(m, download=True)
|
||||
for m in request.messages
|
||||
]
|
||||
else:
|
||||
|
@ -202,7 +202,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
async def embeddings(
|
||||
self,
|
||||
model_id: str,
|
||||
contents: List[InterleavedTextMedia],
|
||||
contents: List[InterleavedContent],
|
||||
) -> EmbeddingsResponse:
|
||||
model = await self.model_store.get_model(model_id)
|
||||
|
||||
|
@ -215,7 +215,7 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
|
|||
), "VLLM does not support media for embeddings"
|
||||
response = self.client.embeddings.create(
|
||||
model=model.provider_resource_id,
|
||||
input=[interleaved_text_media_as_str(content) for content in contents],
|
||||
input=[interleaved_content_as_str(content) for content in contents],
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue