forked from phoenix-oss/llama-stack-mirror
		
	fix: Make remote::vllm compatible with vLLM <= v0.6.3 (#1325)
# What does this PR do? This is to be consistent with OpenAI API and support vLLM <= v0.6.3 References: * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice * https://github.com/vllm-project/vllm/pull/10000 This fixes the error when running older versions of vLLM: ``` 00:50:19.834 [START] /v1/inference/chat-completion INFO 2025-02-28 00:50:20,203 httpx:1025: HTTP Request: POST https://api-xeai-granite-3-1-8b-instruct.apps.int.stc.ai.preprod.us-east-1.aws.paas.redhat.com/v1/chat/completions "HTTP/1.1 400 Bad Request" Traceback (most recent call last): File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 235, in endpoint return await maybe_await(value) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/server/server.py", line 201, in maybe_await return await value File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/distribution/routers/routers.py", line 193, in chat_completion return await provider.chat_completion(**params) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/utils/telemetry/trace_protocol.py", line 89, in async_wrapper result = await method(self, *args, **kwargs) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 286, in chat_completion return await self._nonstream_chat_completion(request, self.client) File "/usr/local/lib/python3.10/site-packages/llama_stack/providers/remote/inference/vllm/vllm.py", line 292, in _nonstream_chat_completion r = client.chat.completions.create(**params) File "/usr/local/lib/python3.10/site-packages/openai/_utils/_utils.py", line 279, in wrapper return func(*args, **kwargs) File "/usr/local/lib/python3.10/site-packages/openai/resources/chat/completions/completions.py", line 879, in create return self._post( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1290, in post return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)) File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 967, in request return self._request( File "/usr/local/lib/python3.10/site-packages/openai/_base_client.py", line 1071, in _request raise self._make_status_error_from_response(err.response) from None openai.BadRequestError: Error code: 400 - {'object': 'error', 'message': "[{'type': 'value_error', 'loc': ('body',), 'msg': 'Value error, When using `tool_choice`, `tools` must be set.', 'input': {'messages': [{'role': 'user', 'content': [{'type': 'text', 'text': 'What model are you?'}]}], 'model': 'granite-3-1-8b-instruct', 'max_tokens': 4096, 'stream': False, 'temperature': 0.0, 'tools': None, 'tool_choice': 'auto'}, 'ctx': {'error': ValueError('When using `tool_choice`, `tools` must be set.')}}]", 'type': 'BadRequestError', 'param': None, 'code': 400} INFO: 2600:1700:9d20:ac0::49:59736 - "POST /v1/inference/chat-completion HTTP/1.1" 500 Internal Server Error 00:50:20.266 [END] /v1/inference/chat-completion [StatusCode.OK] (431.99ms) ``` ## Test Plan All existing tests pass. --------- Signed-off-by: Yuan Tang <terrytangyuan@gmail.com>
This commit is contained in:
		
							parent
							
								
									6fa257b475
								
							
						
					
					
						commit
						18ab1985da
					
				
					 1 changed files with 6 additions and 0 deletions
				
			
		|  | @ -270,6 +270,12 @@ class VLLMInferenceAdapter(Inference, ModelsProtocolPrivate): | |||
|         tool_config: Optional[ToolConfig] = None, | ||||
|     ) -> AsyncGenerator: | ||||
|         model = await self.model_store.get_model(model_id) | ||||
|         # This is to be consistent with OpenAI API and support vLLM <= v0.6.3 | ||||
|         # References: | ||||
|         #   * https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice | ||||
|         #   * https://github.com/vllm-project/vllm/pull/10000 | ||||
|         if not tools and tool_config is not None: | ||||
|             tool_config.tool_choice = ToolChoice.none | ||||
|         request = ChatCompletionRequest( | ||||
|             model=model.provider_resource_id, | ||||
|             messages=messages, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue