Apply formatting to source files

2025-12-17 12:52:36 +00:00 · 2024-12-19 11:46:31 -08:00 · 2024-12-19 11:46:31 -08:00 · c8580d3b0c
commit c8580d3b0c
parent 6ec9eabbeb
2 changed files with 15 additions and 12 deletions
--- a/llama_stack/providers/inline/inference/vllm/vllm.py
+++ b/llama_stack/providers/inline/inference/vllm/vllm.py
@ -88,23 +88,25 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
        """
        Callback that is called when the server associates an inference endpoint
        with an inference provider.
-        
+
        :param model: Object that encapsulates parameters necessary for identifying
         a specific LLM.
-        
+
        :returns: The input ``Model`` object. It may or may not be permissible
         to change fields before returning this object.
        """
-        log.info(f"Registering model {model.identifier} with vLLM inference provider.") 
-        # The current version of this provided is hard-coded to serve only 
+        log.info(f"Registering model {model.identifier} with vLLM inference provider.")
+        # The current version of this provided is hard-coded to serve only
        # the model specified in the YAML config file.
        configured_model = resolve_model(self.config.model)
        registered_model = resolve_model(model.model_id)
-        
+
        if configured_model.core_model_id != registered_model.core_model_id:
-            raise ValueError(f"Requested model '{model.identifier}' is different from "
-                             f"model '{self.config.model}' that this provider "
-                             f"is configured to serve")
+            raise ValueError(
+                f"Requested model '{model.identifier}' is different from "
+                f"model '{self.config.model}' that this provider "
+                f"is configured to serve"
+            )
        return model

    def _sampling_params(self, sampling_params: SamplingParams) -> VLLMSamplingParams:
@ -169,8 +171,9 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
        log.info("Sampling params: %s", sampling_params)
        request_id = _random_uuid()

-        prompt = await chat_completion_request_to_prompt(request, self.config.model,
-                                                         self.formatter)
+        prompt = await chat_completion_request_to_prompt(
+            request, self.config.model, self.formatter
+        )
        vllm_sampling_params = self._sampling_params(request.sampling_params)
        results_generator = self.engine.generate(
            prompt, vllm_sampling_params, request_id
@ -226,7 +229,7 @@ class VLLMInferenceImpl(Inference, ModelsProtocolPrivate):
            stream, self.formatter
        ):
            yield chunk
-            
+
    async def embeddings(
        self, model_id: str, contents: List[InterleavedContent]
    ) -> EmbeddingsResponse:
--- a/llama_stack/providers/tests/inference/fixtures.py
+++ b/llama_stack/providers/tests/inference/fixtures.py
@ -292,6 +292,6 @@ async def inference_stack(request, inference_model):

    # Pytest yield fixture; see https://docs.pytest.org/en/stable/how-to/fixtures.html#yield-fixtures-recommended
    yield test_stack.impls[Api.inference], test_stack.impls[Api.models]
-    
+
    # Cleanup code that runs after test case completion
    await test_stack.impls[Api.inference].shutdown()