feat: use /v1/chat/completions for safety model inference

This commit is contained in:
Matthew Farrellee 2025-09-28 05:22:49 -04:00
parent 65f7b81e98
commit c24a2d8929
72 changed files with 19613 additions and 3 deletions

View file

@ -291,12 +291,13 @@ class LlamaGuardShield:
shield_input_message = self.build_text_shield_input(messages)
# TODO: llama-stack inference protocol has issues with non-streaming inference code
response = await self.inference_api.chat_completion(
model_id=self.model,
response = await self.inference_api.openai_chat_completion(
model=self.model,
messages=[shield_input_message],
stream=False,
temperature=0.0001, # TODO: find a better way
)
content = response.completion_message.content
content = response.choices[0].message.content
content = content.strip()
return self.get_shield_response(content)