forked from phoenix-oss/llama-stack-mirror
		
	This PR begins the process of supporting non-llama models within Llama Stack. We start simple by adding support for this functionality within a few existing providers: fireworks, together and ollama. ## Test Plan ```bash LLAMA_STACK_CONFIG=fireworks pytest -s -v tests/client-sdk/inference/test_text_inference.py \ --inference-model accounts/fireworks/models/phi-3-vision-128k-instruct ``` ^ this passes most of the tests but as expected fails the tool calling related tests since they are very specific to Llama models ``` inference/test_text_inference.py::test_text_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_completion_log_probs_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_completion_log_probs_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_text_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct-completion-01] PASSED inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet do humans live on?-Earth] PASSED inference/test_text_inference.py::test_text_chat_completion_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-Which planet has rings around it with a name starting w ith letter S?-Saturn] PASSED inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What's the name of the Sun in latin?-Sol] PASSED inference/test_text_inference.py::test_text_chat_completion_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct-What is the name of the US captial?-Washington] PASSED inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_non_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_calling_and_streaming[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_required[accounts/fireworks/models/phi-3-vision-128k-instruct] FAILED inference/test_text_inference.py::test_text_chat_completion_with_tool_choice_none[accounts/fireworks/models/phi-3-vision-128k-instruct] PASSED inference/test_text_inference.py::test_text_chat_completion_structured_output[accounts/fireworks/models/phi-3-vision-128k-instruct] ERROR inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-True] PASSED inference/test_text_inference.py::test_text_chat_completion_tool_calling_tools_not_in_request[accounts/fireworks/models/phi-3-vision-128k-instruct-False] PASSED ```
		
			
				
	
	
		
			123 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			123 lines
		
	
	
	
		
			3.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| 
 | |
| import base64
 | |
| import pathlib
 | |
| 
 | |
| import pytest
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def image_path():
 | |
|     return pathlib.Path(__file__).parent / "dog.png"
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def base64_image_data(image_path):
 | |
|     # Convert the image to base64
 | |
|     return base64.b64encode(image_path.read_bytes()).decode("utf-8")
 | |
| 
 | |
| 
 | |
| @pytest.fixture
 | |
| def base64_image_url(base64_image_data, image_path):
 | |
|     # suffix includes the ., so we remove it
 | |
|     return f"data:image/{image_path.suffix[1:]};base64,{base64_image_data}"
 | |
| 
 | |
| 
 | |
| def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
 | |
|     message = {
 | |
|         "role": "user",
 | |
|         "content": [
 | |
|             {
 | |
|                 "type": "image",
 | |
|                 "image": {
 | |
|                     "url": {
 | |
|                         "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/client-sdk/inference/dog.png"
 | |
|                     },
 | |
|                 },
 | |
|             },
 | |
|             {
 | |
|                 "type": "text",
 | |
|                 "text": "Describe what is in this image.",
 | |
|             },
 | |
|         ],
 | |
|     }
 | |
|     response = client_with_models.inference.chat_completion(
 | |
|         model_id=vision_model_id,
 | |
|         messages=[message],
 | |
|         stream=False,
 | |
|     )
 | |
|     message_content = response.completion_message.content.lower().strip()
 | |
|     assert len(message_content) > 0
 | |
|     assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
 | |
| 
 | |
| 
 | |
| def test_image_chat_completion_streaming(client_with_models, vision_model_id):
 | |
|     message = {
 | |
|         "role": "user",
 | |
|         "content": [
 | |
|             {
 | |
|                 "type": "image",
 | |
|                 "image": {
 | |
|                     "url": {
 | |
|                         "uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/client-sdk/inference/dog.png"
 | |
|                     },
 | |
|                 },
 | |
|             },
 | |
|             {
 | |
|                 "type": "text",
 | |
|                 "text": "Describe what is in this image.",
 | |
|             },
 | |
|         ],
 | |
|     }
 | |
|     response = client_with_models.inference.chat_completion(
 | |
|         model_id=vision_model_id,
 | |
|         messages=[message],
 | |
|         stream=True,
 | |
|     )
 | |
|     streamed_content = ""
 | |
|     for chunk in response:
 | |
|         streamed_content += chunk.event.delta.text.lower()
 | |
|     assert len(streamed_content) > 0
 | |
|     assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("type_", ["url", "data"])
 | |
| def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data, base64_image_url, type_):
 | |
|     image_spec = {
 | |
|         "url": {
 | |
|             "type": "image",
 | |
|             "image": {
 | |
|                 "url": {
 | |
|                     "uri": base64_image_url,
 | |
|                 },
 | |
|             },
 | |
|         },
 | |
|         "data": {
 | |
|             "type": "image",
 | |
|             "image": {
 | |
|                 "data": base64_image_data,
 | |
|             },
 | |
|         },
 | |
|     }[type_]
 | |
| 
 | |
|     message = {
 | |
|         "role": "user",
 | |
|         "content": [
 | |
|             image_spec,
 | |
|             {
 | |
|                 "type": "text",
 | |
|                 "text": "Describe what is in this image.",
 | |
|             },
 | |
|         ],
 | |
|     }
 | |
|     response = client_with_models.inference.chat_completion(
 | |
|         model_id=vision_model_id,
 | |
|         messages=[message],
 | |
|         stream=False,
 | |
|     )
 | |
|     message_content = response.completion_message.content.lower().strip()
 | |
|     assert len(message_content) > 0
 |