forked from phoenix-oss/llama-stack-mirror
refactor: move OpenAI compat utilities from nvidia to openai_compat (#1258)
# What does this PR do? This PR: - refactors code which converts between Llama Stack <> OpenAI compat servers which was used by the nvidia implementation to be used more broadly. Next PRs in the stack will show usage. - adds incremental tool call parsing (when tool calls are streamed incrementally, not just whole-sale) ## Test Plan Run ```bash pytest -s -v -k nvidia llama_stack/providers/tests/inference/ --env NVIDIA_API_KEY=.... ``` Text model tests pass (albeit without completions tests) ``` test_text_inference.py::TestInference::test_model_list[-nvidia] PASSED test_text_inference.py::TestInference::test_text_completion_non_streaming[-nvidia-inference:completion:non_streaming] FAILED test_text_inference.py::TestInference::test_text_completion_streaming[-nvidia-inference:completion:streaming] FAILED test_text_inference.py::TestInference::test_text_completion_logprobs_non_streaming[-nvidia-inference:completion:logprobs_non_streaming] FAILED test_text_inference.py::TestInference::test_text_completion_logprobs_streaming[-nvidia-inference:completion:logprobs_streaming] FAILED test_text_inference.py::TestInference::test_text_completion_structured_output[-nvidia-inference:completion:structured_output] FAILED test_text_inference.py::TestInference::test_text_chat_completion_non_streaming[-nvidia-inference:chat_completion:sample_messages] PASSED test_text_inference.py::TestInference::test_text_chat_completion_structured_output[-nvidia-inference:chat_completion:structured_output] PASSED test_text_inference.py::TestInference::test_text_chat_completion_streaming[-nvidia-inference:chat_completion:sample_messages] PASSED test_text_inference.py::TestInference::test_text_chat_completion_with_tool_calling[-nvidia-inference:chat_completion:sample_messages_tool_calling] PASSED test_text_inference.py::TestInference::test_text_chat_completion_with_tool_calling_streaming[-nvidia-inference:chat_completion:sample_messages_tool_calling] PASSED ``` Vision model tests don't: ``` FAILED test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_non_streaming[-nvidia-image0-expected_strings0] - openai.BadRequestError: Error code: 400 - {'type': 'about:blank', 'status': 400, 'title': 'Bad Request', 'detail': 'Inference error'} FAILED test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_non_streaming[-nvidia-image1-expected_strings1] - openai.BadRequestError: Error code: 400 - {'type': 'about:blank', 'status': 400, 'title': 'Bad Request', 'detail': 'Inference error'} FAILED test_vision_inference.py::TestVisionModelInference::test_vision_chat_completion_streaming[-nvidia] - openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'string_type', 'loc': ('body', 'messages', 1, 'content'), 'msg': 'Input should be a valid string', 'input': [{'image_url': {'url': 'https://raw.githubusercontent.com/meta-llama/llam... ```
This commit is contained in:
parent
82799a55bb
commit
b0310af177
5 changed files with 538 additions and 459 deletions
|
@ -40,6 +40,10 @@ from llama_stack.models.llama.datatypes import (
|
|||
from llama_stack.providers.utils.inference.model_registry import (
|
||||
ModelRegistryHelper,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.openai_compat import (
|
||||
convert_openai_chat_completion_choice,
|
||||
convert_openai_chat_completion_stream,
|
||||
)
|
||||
from llama_stack.providers.utils.inference.prompt_adapter import content_has_media
|
||||
|
||||
from . import NVIDIAConfig
|
||||
|
@ -47,8 +51,6 @@ from .models import _MODEL_ENTRIES
|
|||
from .openai_utils import (
|
||||
convert_chat_completion_request,
|
||||
convert_completion_request,
|
||||
convert_openai_chat_completion_choice,
|
||||
convert_openai_chat_completion_stream,
|
||||
convert_openai_completion_choice,
|
||||
convert_openai_completion_stream,
|
||||
)
|
||||
|
@ -201,7 +203,7 @@ class NVIDIAInferenceAdapter(Inference, ModelRegistryHelper):
|
|||
raise ConnectionError(f"Failed to connect to NVIDIA NIM at {self._config.url}: {e}") from e
|
||||
|
||||
if stream:
|
||||
return convert_openai_chat_completion_stream(response)
|
||||
return convert_openai_chat_completion_stream(response, enable_incremental_tool_calls=False)
|
||||
else:
|
||||
# we pass n=1 to get only one completion
|
||||
return convert_openai_chat_completion_choice(response.choices[0])
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue