Support for Llama3.2 models and Swift SDK (#98)

2024-09-25 10:29:58 -07:00 · 2024-09-25 10:29:58 -07:00 · 56aed59eb4
commit 56aed59eb4
parent 95abbf576b
56 changed files with 3745 additions and 630 deletions
--- a/llama_stack/providers/impls/meta_reference/inference/inference.py
+++ b/llama_stack/providers/impls/meta_reference/inference/inference.py
@ -21,7 +21,9 @@ from llama_stack.apis.inference import (
    ToolCallDelta,
    ToolCallParseStatus,
 )
-from llama_stack.providers.utils.inference.prepare_messages import prepare_messages
+from llama_stack.providers.utils.inference.augment_messages import (
+    augment_messages_for_tools,
+)

 from .config import MetaReferenceImplConfig
 from .model_parallel import LlamaModelParallelGenerator
@ -57,7 +59,7 @@ class MetaReferenceInferenceImpl(Inference):
        model: str,
        messages: List[Message],
        sampling_params: Optional[SamplingParams] = SamplingParams(),
-        tools: Optional[List[ToolDefinition]] = [],
+        tools: Optional[List[ToolDefinition]] = None,
        tool_choice: Optional[ToolChoice] = ToolChoice.auto,
        tool_prompt_format: Optional[ToolPromptFormat] = ToolPromptFormat.json,
        stream: Optional[bool] = False,
@ -70,14 +72,14 @@ class MetaReferenceInferenceImpl(Inference):
            model=model,
            messages=messages,
            sampling_params=sampling_params,
-            tools=tools,
+            tools=tools or [],
            tool_choice=tool_choice,
            tool_prompt_format=tool_prompt_format,
            stream=stream,
            logprobs=logprobs,
        )

-        messages = prepare_messages(request)
+        messages = augment_messages_for_tools(request)
        model = resolve_model(request.model)
        if model is None:
            raise RuntimeError(