mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-12 12:06:04 +00:00
fix batch embeddings
This commit is contained in:
parent
e6428492e6
commit
bbdde4ec12
1 changed files with 4 additions and 1 deletions
|
|
@ -25,6 +25,7 @@ from llama_stack.apis.inference import (
|
|||
OpenAIChatCompletionRequestWithExtraBody,
|
||||
OpenAICompletionRequestWithExtraBody,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
OpenAIMessageParam,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
|
|
@ -640,7 +641,9 @@ class ReferenceBatchesImpl(Batches):
|
|||
},
|
||||
}
|
||||
else: # /v1/embeddings
|
||||
embeddings_response = await self.inference_api.openai_embeddings(**request.body)
|
||||
embeddings_response = await self.inference_api.openai_embeddings(
|
||||
OpenAIEmbeddingsRequestWithExtraBody(**request.body)
|
||||
)
|
||||
assert hasattr(embeddings_response, "model_dump_json"), (
|
||||
"Embeddings response must have model_dump_json method"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue