mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-17 21:47:33 +00:00
Memory tests pass now
This commit is contained in:
parent
e51154964f
commit
59ce047aea
23 changed files with 122 additions and 81 deletions
|
|
@ -114,7 +114,7 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
async def completion(
|
||||
self,
|
||||
model_id: str,
|
||||
content: InterleavedTextMedia,
|
||||
content: InterleavedContent,
|
||||
sampling_params: Optional[SamplingParams] = SamplingParams(),
|
||||
response_format: Optional[ResponseFormat] = None,
|
||||
stream: Optional[bool] = False,
|
||||
|
|
@ -218,8 +218,6 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
|
|||
yield chunk
|
||||
|
||||
async def embeddings(
|
||||
self, model_id: str, contents: list[InterleavedTextMedia]
|
||||
self, model_id: str, contents: List[InterleavedContent]
|
||||
) -> EmbeddingsResponse:
|
||||
log.info("vLLM embeddings")
|
||||
# TODO
|
||||
raise NotImplementedError()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue