implement embedding generation in supported inference providers

This commit is contained in:
Dinesh Yeduguru 2024-12-09 12:48:56 -08:00
parent b896be2311
commit e167e9eb93
16 changed files with 383 additions and 29 deletions

View file

@ -203,4 +203,14 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate):
model_id: str,
contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse:
raise NotImplementedError()
model = await self.model_store.get_model(model_id)
kwargs = {}
if model.metadata.get("embedding_dimensions"):
kwargs["dimensions"] = model.metadata.get("embedding_dimensions")
response = self.client.embeddings.create(
model=model.provider_resource_id, input=contents, **kwargs
)
embeddings = [data.embedding for data in response.data]
return EmbeddingsResponse(embeddings=embeddings)