forked from phoenix-oss/llama-stack-mirror
		
	fix: chat completion with more than one choice (#2288)
# What does this PR do? Fix a bug in openai_compat where choices are not indexed correctly. ## Test Plan Added a new test. Rerun the failed inference_store tests: llama stack run fireworks --image-type conda pytest -s -v tests/integration/ --stack-config http://localhost:8321 -k 'test_inference_store' --text-model meta-llama/Llama-3.3-70B-Instruct --count 10
This commit is contained in:
		
							parent
							
								
									1d46f3102e
								
							
						
					
					
						commit
						0b695538af
					
				
					 2 changed files with 53 additions and 11 deletions
				
			
		|  | @ -1402,9 +1402,8 @@ class OpenAIChatCompletionToLlamaStackMixin: | |||
|         outstanding_responses: list[Awaitable[AsyncIterator[ChatCompletionResponseStreamChunk]]], | ||||
|     ): | ||||
|         id = f"chatcmpl-{uuid.uuid4()}" | ||||
|         for outstanding_response in outstanding_responses: | ||||
|         for i, outstanding_response in enumerate(outstanding_responses): | ||||
|             response = await outstanding_response | ||||
|             i = 0 | ||||
|             async for chunk in response: | ||||
|                 event = chunk.event | ||||
|                 finish_reason = _convert_stop_reason_to_openai_finish_reason(event.stop_reason) | ||||
|  | @ -1459,7 +1458,6 @@ class OpenAIChatCompletionToLlamaStackMixin: | |||
|                             model=model, | ||||
|                             object="chat.completion.chunk", | ||||
|                         ) | ||||
|                 i = i + 1 | ||||
| 
 | ||||
|     async def _process_non_stream_response( | ||||
|         self, model: str, outstanding_responses: list[Awaitable[ChatCompletionResponse]] | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue