mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-08-07 02:58:21 +00:00
Merge branch 'main' into fix-base64
This commit is contained in:
commit
baddcf910b
10 changed files with 253 additions and 31 deletions
69
.github/workflows/tests.yml
vendored
Normal file
69
.github/workflows/tests.yml
vendored
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
name: auto-tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
# pull_request:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
commit_sha:
|
||||||
|
description: 'Specific Commit SHA to trigger on'
|
||||||
|
required: false
|
||||||
|
default: $GITHUB_SHA # default to the last commit of $GITHUB_REF branch
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test-llama-stack-as-library:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
TOGETHER_API_KEY: ${{ secrets.TOGETHER_API_KEY }}
|
||||||
|
FIREWORKS_API_KEY: ${{ secrets.FIREWORKS_API_KEY }}
|
||||||
|
TAVILY_SEARCH_API_KEY: ${{ secrets.TAVILY_SEARCH_API_KEY }}
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
provider: [fireworks, together]
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
ref: ${{ github.event.inputs.commit_sha }}
|
||||||
|
|
||||||
|
- name: Echo commit SHA
|
||||||
|
run: |
|
||||||
|
echo "Triggered on commit SHA: ${{ github.event.inputs.commit_sha }}"
|
||||||
|
git rev-parse HEAD
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install -r requirements.txt pytest
|
||||||
|
pip install -e .
|
||||||
|
|
||||||
|
- name: Build providers
|
||||||
|
run: |
|
||||||
|
llama stack build --template ${{ matrix.provider }} --image-type venv
|
||||||
|
|
||||||
|
- name: Install the latest llama-stack-client & llama-models packages
|
||||||
|
run: |
|
||||||
|
pip install -e git+https://github.com/meta-llama/llama-stack-client-python.git#egg=llama-stack-client
|
||||||
|
pip install -e git+https://github.com/meta-llama/llama-models.git#egg=llama-models
|
||||||
|
|
||||||
|
- name: Run client-sdk test
|
||||||
|
working-directory: "${{ github.workspace }}"
|
||||||
|
env:
|
||||||
|
REPORT_OUTPUT: md_report.md
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
pip install --upgrade pytest-md-report
|
||||||
|
echo "REPORT_FILE=${REPORT_OUTPUT}" >> "$GITHUB_ENV"
|
||||||
|
|
||||||
|
export INFERENCE_MODEL=meta-llama/Llama-3.1-8B-Instruct
|
||||||
|
LLAMA_STACK_CONFIG=./llama_stack/templates/${{ matrix.provider }}/run.yaml pytest --md-report --md-report-verbose=1 ./tests/client-sdk/inference/test_inference.py --md-report-output "$REPORT_OUTPUT"
|
||||||
|
|
||||||
|
- name: Output reports to the job summary
|
||||||
|
if: always()
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
if [ -f "$REPORT_FILE" ]; then
|
||||||
|
echo "<details><summary> Test Report for ${{ matrix.provider }} </summary>" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
cat "$REPORT_FILE" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "" >> $GITHUB_STEP_SUMMARY
|
||||||
|
echo "</details>" >> $GITHUB_STEP_SUMMARY
|
||||||
|
fi
|
|
@ -7,9 +7,9 @@ You can run a Llama Stack server in one of the following ways:
|
||||||
This is the simplest way to get started. Using Llama Stack as a library means you do not need to start a server. This is especially useful when you are not running inference locally and relying on an external inference service (eg. fireworks, together, groq, etc.) See [Using Llama Stack as a Library](importing_as_library)
|
This is the simplest way to get started. Using Llama Stack as a library means you do not need to start a server. This is especially useful when you are not running inference locally and relying on an external inference service (eg. fireworks, together, groq, etc.) See [Using Llama Stack as a Library](importing_as_library)
|
||||||
|
|
||||||
|
|
||||||
**Docker**:
|
**Container**:
|
||||||
|
|
||||||
Another simple way to start interacting with Llama Stack is to just spin up docker which is pre-built with all the providers you need. We provide a number of pre-built Docker containers so you can start a Llama Stack server instantly. You can also build your own custom Docker container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](distributions/selection) for more details.
|
Another simple way to start interacting with Llama Stack is to just spin up a container (via Docker or Podman) which is pre-built with all the providers you need. We provide a number of pre-built images so you can start a Llama Stack server instantly. You can also build your own custom container. Which distribution to choose depends on the hardware you have. See [Selection of a Distribution](selection) for more details.
|
||||||
|
|
||||||
|
|
||||||
**Conda**:
|
**Conda**:
|
||||||
|
@ -24,4 +24,5 @@ Lastly, if you have a custom or an advanced setup or you are developing on Llama
|
||||||
importing_as_library
|
importing_as_library
|
||||||
building_distro
|
building_distro
|
||||||
configuration
|
configuration
|
||||||
|
selection
|
||||||
```
|
```
|
||||||
|
|
|
@ -44,7 +44,7 @@ The following models are available by default:
|
||||||
|
|
||||||
### Prerequisite: API Keys
|
### Prerequisite: API Keys
|
||||||
|
|
||||||
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaBova.ai](https://sambanova.ai/).
|
Make sure you have access to a SambaNova API Key. You can get one by visiting [SambaNova.ai](https://cloud.sambanova.ai/).
|
||||||
|
|
||||||
|
|
||||||
## Running Llama Stack with SambaNova
|
## Running Llama Stack with SambaNova
|
||||||
|
|
|
@ -140,6 +140,10 @@ class StackRun(Subcommand):
|
||||||
return
|
return
|
||||||
|
|
||||||
def get_conda_prefix(env_name):
|
def get_conda_prefix(env_name):
|
||||||
|
# Conda "base" environment does not end with "base" in the
|
||||||
|
# prefix, so should be handled separately.
|
||||||
|
if env_name == "base":
|
||||||
|
return os.environ.get("CONDA_PREFIX")
|
||||||
# Get conda environments info
|
# Get conda environments info
|
||||||
conda_env_info = json.loads(
|
conda_env_info = json.loads(
|
||||||
subprocess.check_output(
|
subprocess.check_output(
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import warnings
|
import warnings
|
||||||
from typing import AsyncGenerator, Literal
|
from typing import AsyncGenerator, Literal, Union
|
||||||
|
|
||||||
from groq import Stream
|
from groq import Stream
|
||||||
from groq.types.chat.chat_completion import ChatCompletion
|
from groq.types.chat.chat_completion import ChatCompletion
|
||||||
|
@ -30,6 +30,8 @@ from groq.types.shared.function_definition import FunctionDefinition
|
||||||
|
|
||||||
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import (
|
from llama_stack.apis.common.content_types import (
|
||||||
TextDelta,
|
TextDelta,
|
||||||
ToolCallDelta,
|
ToolCallDelta,
|
||||||
|
@ -150,15 +152,26 @@ def convert_chat_completion_response(
|
||||||
_convert_groq_tool_call(tool_call)
|
_convert_groq_tool_call(tool_call)
|
||||||
for tool_call in choice.message.tool_calls
|
for tool_call in choice.message.tool_calls
|
||||||
]
|
]
|
||||||
return ChatCompletionResponse(
|
if any(isinstance(tool_call, UnparseableToolCall) for tool_call in tool_calls):
|
||||||
completion_message=CompletionMessage(
|
# If we couldn't parse a tool call, jsonify the tool calls and return them
|
||||||
tool_calls=tool_calls,
|
return ChatCompletionResponse(
|
||||||
stop_reason=StopReason.end_of_message,
|
completion_message=CompletionMessage(
|
||||||
# Content is not optional
|
stop_reason=StopReason.end_of_message,
|
||||||
content="",
|
content=json.dumps(tool_calls, default=lambda x: x.model_dump()),
|
||||||
),
|
),
|
||||||
logprobs=None,
|
logprobs=None,
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
# Otherwise, return tool calls as normal
|
||||||
|
return ChatCompletionResponse(
|
||||||
|
completion_message=CompletionMessage(
|
||||||
|
tool_calls=tool_calls,
|
||||||
|
stop_reason=StopReason.end_of_message,
|
||||||
|
# Content is not optional
|
||||||
|
content="",
|
||||||
|
),
|
||||||
|
logprobs=None,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
return ChatCompletionResponse(
|
return ChatCompletionResponse(
|
||||||
completion_message=CompletionMessage(
|
completion_message=CompletionMessage(
|
||||||
|
@ -214,15 +227,27 @@ async def convert_chat_completion_response_stream(
|
||||||
|
|
||||||
# We assume Groq produces fully formed tool calls for each chunk
|
# We assume Groq produces fully formed tool calls for each chunk
|
||||||
tool_call = _convert_groq_tool_call(choice.delta.tool_calls[0])
|
tool_call = _convert_groq_tool_call(choice.delta.tool_calls[0])
|
||||||
yield ChatCompletionResponseStreamChunk(
|
if isinstance(tool_call, ToolCall):
|
||||||
event=ChatCompletionResponseEvent(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event_type=event_type,
|
event=ChatCompletionResponseEvent(
|
||||||
delta=ToolCallDelta(
|
event_type=event_type,
|
||||||
tool_call=tool_call,
|
delta=ToolCallDelta(
|
||||||
parse_status=ToolCallParseStatus.succeeded,
|
tool_call=tool_call,
|
||||||
),
|
parse_status=ToolCallParseStatus.succeeded,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Otherwise it's an UnparseableToolCall - return the raw tool call
|
||||||
|
yield ChatCompletionResponseStreamChunk(
|
||||||
|
event=ChatCompletionResponseEvent(
|
||||||
|
event_type=event_type,
|
||||||
|
delta=ToolCallDelta(
|
||||||
|
tool_call=tool_call.model_dump_json(),
|
||||||
|
parse_status=ToolCallParseStatus.failed,
|
||||||
|
),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
yield ChatCompletionResponseStreamChunk(
|
yield ChatCompletionResponseStreamChunk(
|
||||||
event=ChatCompletionResponseEvent(
|
event=ChatCompletionResponseEvent(
|
||||||
|
@ -234,12 +259,35 @@ async def convert_chat_completion_response_stream(
|
||||||
event_type = ChatCompletionResponseEventType.progress
|
event_type = ChatCompletionResponseEventType.progress
|
||||||
|
|
||||||
|
|
||||||
def _convert_groq_tool_call(tool_call: ChatCompletionMessageToolCall) -> ToolCall:
|
class UnparseableToolCall(BaseModel):
|
||||||
|
"""
|
||||||
|
A ToolCall with arguments that are not valid JSON.
|
||||||
|
Mirrors the ToolCall schema, but with arguments as a string.
|
||||||
|
"""
|
||||||
|
|
||||||
|
call_id: str
|
||||||
|
tool_name: str
|
||||||
|
arguments: str
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_groq_tool_call(
|
||||||
|
tool_call: ChatCompletionMessageToolCall,
|
||||||
|
) -> Union[ToolCall, UnparseableToolCall]:
|
||||||
|
"""
|
||||||
|
Convert a Groq tool call to a ToolCall.
|
||||||
|
Returns an UnparseableToolCall if the tool call is not valid JSON.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
arguments = json.loads(tool_call.function.arguments)
|
||||||
|
except Exception as e:
|
||||||
|
return UnparseableToolCall(
|
||||||
|
call_id=tool_call.id,
|
||||||
|
tool_name=tool_call.function.name,
|
||||||
|
arguments=tool_call.function.arguments,
|
||||||
|
)
|
||||||
|
|
||||||
return ToolCall(
|
return ToolCall(
|
||||||
call_id=tool_call.id,
|
call_id=tool_call.id,
|
||||||
tool_name=tool_call.function.name,
|
tool_name=tool_call.function.name,
|
||||||
# Note that Groq may return a string that is not valid JSON here
|
arguments=arguments,
|
||||||
# So this may raise a 500 error. Going to leave this as is to see
|
|
||||||
# how big of an issue this is and what we can do about it.
|
|
||||||
arguments=json.loads(tool_call.function.arguments),
|
|
||||||
)
|
)
|
||||||
|
|
|
@ -57,6 +57,10 @@ MODEL_ALIASES = [
|
||||||
"Meta-Llama-3.2-3B-Instruct",
|
"Meta-Llama-3.2-3B-Instruct",
|
||||||
CoreModelId.llama3_2_3b_instruct.value,
|
CoreModelId.llama3_2_3b_instruct.value,
|
||||||
),
|
),
|
||||||
|
build_model_alias(
|
||||||
|
"Meta-Llama-3.3-70B-Instruct",
|
||||||
|
CoreModelId.llama3_3_70b_instruct.value,
|
||||||
|
),
|
||||||
build_model_alias(
|
build_model_alias(
|
||||||
"Llama-3.2-11B-Vision-Instruct",
|
"Llama-3.2-11B-Vision-Instruct",
|
||||||
CoreModelId.llama3_2_11b_vision_instruct.value,
|
CoreModelId.llama3_2_11b_vision_instruct.value,
|
||||||
|
|
|
@ -161,7 +161,10 @@ class TogetherInferenceAdapter(
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
def _build_options(
|
def _build_options(
|
||||||
self, sampling_params: Optional[SamplingParams], fmt: ResponseFormat
|
self,
|
||||||
|
sampling_params: Optional[SamplingParams],
|
||||||
|
logprobs: Optional[LogProbConfig],
|
||||||
|
fmt: ResponseFormat,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
options = get_sampling_options(sampling_params)
|
options = get_sampling_options(sampling_params)
|
||||||
if fmt:
|
if fmt:
|
||||||
|
@ -175,6 +178,13 @@ class TogetherInferenceAdapter(
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown response format {fmt.type}")
|
raise ValueError(f"Unknown response format {fmt.type}")
|
||||||
|
|
||||||
|
if logprobs and logprobs.top_k:
|
||||||
|
if logprobs.top_k != 1:
|
||||||
|
raise ValueError(
|
||||||
|
f"Unsupported value: Together only supports logprobs top_k=1. {logprobs.top_k} was provided",
|
||||||
|
)
|
||||||
|
options["logprobs"] = 1
|
||||||
|
|
||||||
return options
|
return options
|
||||||
|
|
||||||
async def chat_completion(
|
async def chat_completion(
|
||||||
|
@ -263,7 +273,9 @@ class TogetherInferenceAdapter(
|
||||||
"model": request.model,
|
"model": request.model,
|
||||||
**input_dict,
|
**input_dict,
|
||||||
"stream": request.stream,
|
"stream": request.stream,
|
||||||
**self._build_options(request.sampling_params, request.response_format),
|
**self._build_options(
|
||||||
|
request.sampling_params, request.logprobs, request.response_format
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
async def embeddings(
|
async def embeddings(
|
||||||
|
|
|
@ -23,6 +23,7 @@ from groq.types.chat.chat_completion_message_tool_call import (
|
||||||
from groq.types.shared.function_definition import FunctionDefinition
|
from groq.types.shared.function_definition import FunctionDefinition
|
||||||
from llama_models.datatypes import GreedySamplingStrategy, TopPSamplingStrategy
|
from llama_models.datatypes import GreedySamplingStrategy, TopPSamplingStrategy
|
||||||
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
from llama_models.llama3.api.datatypes import ToolParamDefinition
|
||||||
|
from llama_stack.apis.common.content_types import ToolCallParseStatus
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
ChatCompletionResponseEventType,
|
ChatCompletionResponseEventType,
|
||||||
|
@ -347,6 +348,26 @@ class TestConvertNonStreamChatCompletionResponse:
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def test_converts_unparseable_tool_calls(self):
|
||||||
|
response = self._dummy_chat_completion_response_with_tool_call()
|
||||||
|
response.choices[0].message.tool_calls = [
|
||||||
|
ChatCompletionMessageToolCall(
|
||||||
|
id="tool_call_id",
|
||||||
|
type="function",
|
||||||
|
function=Function(
|
||||||
|
name="log",
|
||||||
|
arguments="(number=10, base=2)",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
converted = convert_chat_completion_response(response)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
converted.completion_message.content
|
||||||
|
== '[{"call_id": "tool_call_id", "tool_name": "log", "arguments": "(number=10, base=2)"}]'
|
||||||
|
)
|
||||||
|
|
||||||
def _dummy_chat_completion_response(self):
|
def _dummy_chat_completion_response(self):
|
||||||
return ChatCompletion(
|
return ChatCompletion(
|
||||||
id="chatcmpl-123",
|
id="chatcmpl-123",
|
||||||
|
@ -478,6 +499,40 @@ class TestConvertStreamChatCompletionResponse:
|
||||||
arguments={"origin": "AU", "destination": "LAX"},
|
arguments={"origin": "AU", "destination": "LAX"},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_returns_tool_calls_stream_with_unparseable_tool_calls(self):
|
||||||
|
def tool_call_stream():
|
||||||
|
chunk = self._dummy_chat_completion_chunk_with_tool_call()
|
||||||
|
chunk.choices[0].delta.tool_calls = [
|
||||||
|
ChoiceDeltaToolCall(
|
||||||
|
index=0,
|
||||||
|
type="function",
|
||||||
|
id="tool_call_id",
|
||||||
|
function=ChoiceDeltaToolCallFunction(
|
||||||
|
name="get_flight_info",
|
||||||
|
arguments="(origin=AU, destination=LAX)",
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
chunk = self._dummy_chat_completion_chunk_with_tool_call()
|
||||||
|
chunk.choices[0].delta.content = None
|
||||||
|
chunk.choices[0].finish_reason = "stop"
|
||||||
|
yield chunk
|
||||||
|
|
||||||
|
stream = tool_call_stream()
|
||||||
|
converted = convert_chat_completion_response_stream(stream)
|
||||||
|
|
||||||
|
iter = converted.__aiter__()
|
||||||
|
chunk = await iter.__anext__()
|
||||||
|
assert chunk.event.event_type == ChatCompletionResponseEventType.start
|
||||||
|
assert (
|
||||||
|
chunk.event.delta.content
|
||||||
|
== '{"call_id":"tool_call_id","tool_name":"get_flight_info","arguments":"(origin=AU, destination=LAX)"}'
|
||||||
|
)
|
||||||
|
assert chunk.event.delta.parse_status == ToolCallParseStatus.failed
|
||||||
|
|
||||||
def _dummy_chat_completion_chunk(self):
|
def _dummy_chat_completion_chunk(self):
|
||||||
return ChatCompletionChunk(
|
return ChatCompletionChunk(
|
||||||
id="chatcmpl-123",
|
id="chatcmpl-123",
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
from typing import AsyncGenerator, Dict, List, Optional
|
from typing import AsyncGenerator, Dict, List, Optional, Union
|
||||||
|
|
||||||
from llama_models.datatypes import (
|
from llama_models.datatypes import (
|
||||||
GreedySamplingStrategy,
|
GreedySamplingStrategy,
|
||||||
|
@ -121,7 +121,31 @@ def convert_openai_completion_logprobs(
|
||||||
) -> Optional[List[TokenLogProbs]]:
|
) -> Optional[List[TokenLogProbs]]:
|
||||||
if not logprobs:
|
if not logprobs:
|
||||||
return None
|
return None
|
||||||
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
|
if hasattr(logprobs, "top_logprobs"):
|
||||||
|
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
|
||||||
|
|
||||||
|
# Together supports logprobs with top_k=1 only. This means for each token position,
|
||||||
|
# they return only the logprobs for the selected token (vs. the top n most likely tokens).
|
||||||
|
# Here we construct the response by matching the selected token with the logprobs.
|
||||||
|
if logprobs.tokens and logprobs.token_logprobs:
|
||||||
|
return [
|
||||||
|
TokenLogProbs(logprobs_by_token={token: token_lp})
|
||||||
|
for token, token_lp in zip(logprobs.tokens, logprobs.token_logprobs)
|
||||||
|
]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def convert_openai_completion_logprobs_stream(
|
||||||
|
text: str, logprobs: Optional[Union[float, OpenAICompatLogprobs]]
|
||||||
|
):
|
||||||
|
if logprobs is None:
|
||||||
|
return None
|
||||||
|
if isinstance(logprobs, float):
|
||||||
|
# Adapt response from Together CompletionChoicesChunk
|
||||||
|
return [TokenLogProbs(logprobs_by_token={text: logprobs})]
|
||||||
|
if hasattr(logprobs, "top_logprobs"):
|
||||||
|
return [TokenLogProbs(logprobs_by_token=x) for x in logprobs.top_logprobs]
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def process_completion_response(
|
def process_completion_response(
|
||||||
|
@ -188,7 +212,7 @@ async def process_completion_stream_response(
|
||||||
yield CompletionResponseStreamChunk(
|
yield CompletionResponseStreamChunk(
|
||||||
delta=text,
|
delta=text,
|
||||||
stop_reason=stop_reason,
|
stop_reason=stop_reason,
|
||||||
logprobs=convert_openai_completion_logprobs(choice.logprobs),
|
logprobs=convert_openai_completion_logprobs_stream(text, choice.logprobs),
|
||||||
)
|
)
|
||||||
if finish_reason:
|
if finish_reason:
|
||||||
if finish_reason in ["stop", "eos", "eos_token"]:
|
if finish_reason in ["stop", "eos", "eos_token"]:
|
||||||
|
|
|
@ -116,6 +116,11 @@ models:
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
provider_model_id: Meta-Llama-3.2-3B-Instruct
|
||||||
model_type: llm
|
model_type: llm
|
||||||
|
- metadata: {}
|
||||||
|
model_id: meta-llama/Llama-3.3-70B-Instruct
|
||||||
|
provider_id: sambanova
|
||||||
|
provider_model_id: Meta-Llama-3.3-70B-Instruct
|
||||||
|
model_type: llm
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
model_id: meta-llama/Llama-3.2-11B-Vision-Instruct
|
||||||
provider_id: sambanova
|
provider_id: sambanova
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue