fixing safety inference and safety adapter for new API spec. Pinned the llama_models version to 0.0.24 as the latest version 0.0.35 has the model descriptor name changed. I was getting the missing package error during runtime as well, hence added the dependency to requirements.txt

2025-10-06 04:34:57 +00:00 · 2024-09-25 14:14:15 -07:00 · 2024-09-25 14:14:15 -07:00 · 9bb0c8f4fc
commit 9bb0c8f4fc
parent 53070e34a3
4 changed files with 33 additions and 26 deletions
--- a/llama_stack/providers/adapters/inference/together/together.py
+++ b/llama_stack/providers/adapters/inference/together/together.py
@ -18,6 +18,7 @@ from llama_stack.apis.inference import *  # noqa: F403
 from llama_stack.providers.utils.inference.augment_messages import (
    augment_messages_for_tools,
 )
+from llama_stack.distribution.request_headers import get_request_provider_data

 from .config import TogetherImplConfig

@ -97,6 +98,16 @@ class TogetherInferenceAdapter(Inference):
        stream: Optional[bool] = False,
        logprobs: Optional[LogProbConfig] = None,
    ) -> AsyncGenerator:
+
+        together_api_key = None
+        provider_data = get_request_provider_data()
+        if provider_data is None or not provider_data.together_api_key:
+            raise ValueError(
+                'Pass Together API Key in the header X-LlamaStack-ProviderData as { "together_api_key": <your api key>}'
+            )
+        together_api_key = provider_data.together_api_key
+
+        client = Together(api_key=together_api_key)
        # wrapper request to make it easier to pass around (internal only, not exposed to API)
        request = ChatCompletionRequest(
            model=model,
@ -116,7 +127,7 @@ class TogetherInferenceAdapter(Inference):

        if not request.stream:
            # TODO: might need to add back an async here
-            r = self.client.chat.completions.create(
+            r = client.chat.completions.create(
                model=together_model,
                messages=self._messages_to_together_messages(messages),
                stream=False,
@ -151,7 +162,7 @@ class TogetherInferenceAdapter(Inference):
            ipython = False
            stop_reason = None

-            for chunk in self.client.chat.completions.create(
+            for chunk in client.chat.completions.create(
                model=together_model,
                messages=self._messages_to_together_messages(messages),
                stream=True,