Merge remote-tracking branch 'origin/main' into TamiTakamiya/tool-param-definition-update

This commit is contained in:
Ashwin Bharambe 2025-09-27 11:24:11 -07:00
commit 27c5365f99
47 changed files with 49304 additions and 1334 deletions

View file

@ -210,55 +210,6 @@
}
}
},
"/v1/inference/completion": {
"post": {
"responses": {
"200": {
"description": "If stream=False, returns a CompletionResponse with the full completion. If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionResponse"
}
},
"text/event-stream": {
"schema": {
"$ref": "#/components/schemas/CompletionResponseStreamChunk"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Inference"
],
"summary": "Generate a completion for the given content using the specified model.",
"description": "Generate a completion for the given content using the specified model.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CompletionRequest"
}
}
},
"required": true
}
}
},
"/v1/agents": {
"get": {
"responses": {
@ -7324,126 +7275,6 @@
"title": "ToolCallDelta",
"description": "A tool call content delta for streaming responses."
},
"CompletionRequest": {
"type": "object",
"properties": {
"model_id": {
"type": "string",
"description": "The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint."
},
"content": {
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content to generate a completion for."
},
"sampling_params": {
"$ref": "#/components/schemas/SamplingParams",
"description": "(Optional) Parameters to control the sampling strategy."
},
"response_format": {
"$ref": "#/components/schemas/ResponseFormat",
"description": "(Optional) Grammar specification for guided (structured) decoding."
},
"stream": {
"type": "boolean",
"description": "(Optional) If True, generate an SSE event stream of the response. Defaults to False."
},
"logprobs": {
"type": "object",
"properties": {
"top_k": {
"type": "integer",
"default": 0,
"description": "How many tokens (for each position) to return log probabilities for."
}
},
"additionalProperties": false,
"description": "(Optional) If specified, log probabilities for each token position will be returned."
}
},
"additionalProperties": false,
"required": [
"model_id",
"content"
],
"title": "CompletionRequest"
},
"CompletionResponse": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"content": {
"type": "string",
"description": "The generated completion text"
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Reason why generation stopped"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"content",
"stop_reason"
],
"title": "CompletionResponse",
"description": "Response from a completion request."
},
"CompletionResponseStreamChunk": {
"type": "object",
"properties": {
"metrics": {
"type": "array",
"items": {
"$ref": "#/components/schemas/MetricInResponse"
},
"description": "(Optional) List of metrics associated with the API response"
},
"delta": {
"type": "string",
"description": "New content generated since last chunk. This can be one or more tokens."
},
"stop_reason": {
"type": "string",
"enum": [
"end_of_turn",
"end_of_message",
"out_of_tokens"
],
"description": "Optional reason why generation stopped, if complete"
},
"logprobs": {
"type": "array",
"items": {
"$ref": "#/components/schemas/TokenLogProbs"
},
"description": "Optional log probabilities for generated tokens"
}
},
"additionalProperties": false,
"required": [
"delta"
],
"title": "CompletionResponseStreamChunk",
"description": "A chunk of a streamed completion response."
},
"AgentConfig": {
"type": "object",
"properties": {

View file

@ -132,43 +132,6 @@ paths:
schema:
$ref: '#/components/schemas/ChatCompletionRequest'
required: true
/v1/inference/completion:
post:
responses:
'200':
description: >-
If stream=False, returns a CompletionResponse with the full completion.
If stream=True, returns an SSE event stream of CompletionResponseStreamChunk.
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionResponse'
text/event-stream:
schema:
$ref: '#/components/schemas/CompletionResponseStreamChunk'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Inference
summary: >-
Generate a completion for the given content using the specified model.
description: >-
Generate a completion for the given content using the specified model.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CompletionRequest'
required: true
/v1/agents:
get:
responses:
@ -5302,112 +5265,6 @@ components:
title: ToolCallDelta
description: >-
A tool call content delta for streaming responses.
CompletionRequest:
type: object
properties:
model_id:
type: string
description: >-
The identifier of the model to use. The model must be registered with
Llama Stack and available via the /models endpoint.
content:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The content to generate a completion for.
sampling_params:
$ref: '#/components/schemas/SamplingParams'
description: >-
(Optional) Parameters to control the sampling strategy.
response_format:
$ref: '#/components/schemas/ResponseFormat'
description: >-
(Optional) Grammar specification for guided (structured) decoding.
stream:
type: boolean
description: >-
(Optional) If True, generate an SSE event stream of the response. Defaults
to False.
logprobs:
type: object
properties:
top_k:
type: integer
default: 0
description: >-
How many tokens (for each position) to return log probabilities for.
additionalProperties: false
description: >-
(Optional) If specified, log probabilities for each token position will
be returned.
additionalProperties: false
required:
- model_id
- content
title: CompletionRequest
CompletionResponse:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
content:
type: string
description: The generated completion text
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: Reason why generation stopped
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- content
- stop_reason
title: CompletionResponse
description: Response from a completion request.
CompletionResponseStreamChunk:
type: object
properties:
metrics:
type: array
items:
$ref: '#/components/schemas/MetricInResponse'
description: >-
(Optional) List of metrics associated with the API response
delta:
type: string
description: >-
New content generated since last chunk. This can be one or more tokens.
stop_reason:
type: string
enum:
- end_of_turn
- end_of_message
- out_of_tokens
description: >-
Optional reason why generation stopped, if complete
logprobs:
type: array
items:
$ref: '#/components/schemas/TokenLogProbs'
description: >-
Optional log probabilities for generated tokens
additionalProperties: false
required:
- delta
title: CompletionResponseStreamChunk
description: >-
A chunk of a streamed completion response.
AgentConfig:
type: object
properties:

View file

@ -1008,7 +1008,6 @@ class InferenceProvider(Protocol):
model_store: ModelStore | None = None
@webmethod(route="/inference/completion", method="POST", level=LLAMA_STACK_API_V1)
async def completion(
self,
model_id: str,

View file

@ -224,10 +224,6 @@ class TelemetryAdapter(TelemetryDatasetMixin, Telemetry):
return _GLOBAL_STORAGE["gauges"][name]
def _log_metric(self, event: MetricEvent) -> None:
# Always log to console if console sink is enabled (debug)
if TelemetrySink.CONSOLE in self.config.sinks:
logger.debug(f"METRIC: {event.metric}={event.value} {event.unit} {event.attributes}")
# Add metric as an event to the current span
try:
with self._lock:

View file

@ -61,6 +61,7 @@ logger = get_logger(name=__name__, category="inference::fireworks")
class FireworksInferenceAdapter(OpenAIMixin, ModelRegistryHelper, Inference, NeedsRequestProviderData):
embedding_model_metadata = {
"nomic-ai/nomic-embed-text-v1.5": {"embedding_dimension": 768, "context_length": 8192},
"accounts/fireworks/models/qwen3-embedding-8b": {"embedding_dimension": 4096, "context_length": 40960},
}
def __init__(self, config: FireworksImplConfig) -> None:

View file

@ -192,6 +192,14 @@ async def localize_image_content(uri: str) -> tuple[bytes, str] | None:
format = "png"
return content, format
elif uri.startswith("data"):
# data:image/{format};base64,{data}
match = re.match(r"data:image/(\w+);base64,(.+)", uri)
if not match:
raise ValueError(f"Invalid data URL format, {uri[:40]}...")
fmt, image_data = match.groups()
content = base64.b64decode(image_data)
return content, fmt
else:
return None

View file

@ -1,303 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
#
# Test plan:
#
# Types of input:
# - array of a string
# - array of a image (ImageContentItem, either URL or base64 string)
# - array of a text (TextContentItem)
# Types of output:
# - list of list of floats
# Params:
# - text_truncation
# - absent w/ long text -> error
# - none w/ long text -> error
# - absent w/ short text -> ok
# - none w/ short text -> ok
# - end w/ long text -> ok
# - end w/ short text -> ok
# - start w/ long text -> ok
# - start w/ short text -> ok
# - output_dimension
# - response dimension matches
# - task_type, only for asymmetric models
# - query embedding != passage embedding
# Negative:
# - long string
# - long text
#
# Todo:
# - negative tests
# - empty
# - empty list
# - empty string
# - empty text
# - empty image
# - long
# - large image
# - appropriate combinations
# - batch size
# - many inputs
# - invalid
# - invalid URL
# - invalid base64
#
# Notes:
# - use llama_stack_client fixture
# - use pytest.mark.parametrize when possible
# - no accuracy tests: only check the type of output, not the content
#
import pytest
from llama_stack_client import BadRequestError as LlamaStackBadRequestError
from llama_stack_client.types import EmbeddingsResponse
from llama_stack_client.types.shared.interleaved_content import (
ImageContentItem,
ImageContentItemImage,
ImageContentItemImageURL,
TextContentItem,
)
from openai import BadRequestError as OpenAIBadRequestError
from llama_stack.core.library_client import LlamaStackAsLibraryClient
DUMMY_STRING = "hello"
DUMMY_STRING2 = "world"
DUMMY_LONG_STRING = "NVDA " * 10240
DUMMY_TEXT = TextContentItem(text=DUMMY_STRING, type="text")
DUMMY_TEXT2 = TextContentItem(text=DUMMY_STRING2, type="text")
DUMMY_LONG_TEXT = TextContentItem(text=DUMMY_LONG_STRING, type="text")
# TODO(mf): add a real image URL and base64 string
DUMMY_IMAGE_URL = ImageContentItem(
image=ImageContentItemImage(url=ImageContentItemImageURL(uri="https://example.com/image.jpg")), type="image"
)
DUMMY_IMAGE_BASE64 = ImageContentItem(image=ImageContentItemImage(data="base64string"), type="image")
SUPPORTED_PROVIDERS = {"remote::nvidia"}
MODELS_SUPPORTING_MEDIA = {}
MODELS_SUPPORTING_OUTPUT_DIMENSION = {"nvidia/llama-3.2-nv-embedqa-1b-v2"}
MODELS_REQUIRING_TASK_TYPE = {
"nvidia/llama-3.2-nv-embedqa-1b-v2",
"nvidia/nv-embedqa-e5-v5",
"nvidia/nv-embedqa-mistral-7b-v2",
"snowflake/arctic-embed-l",
}
MODELS_SUPPORTING_TASK_TYPE = MODELS_REQUIRING_TASK_TYPE
def default_task_type(model_id):
"""
Some models require a task type parameter. This provides a default value for
testing those models.
"""
if model_id in MODELS_REQUIRING_TASK_TYPE:
return {"task_type": "query"}
return {}
@pytest.mark.parametrize(
"contents",
[
[DUMMY_STRING, DUMMY_STRING2],
[DUMMY_TEXT, DUMMY_TEXT2],
],
ids=[
"list[string]",
"list[text]",
],
)
def test_embedding_text(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_IMAGE_URL, DUMMY_IMAGE_BASE64],
[DUMMY_IMAGE_URL, DUMMY_STRING, DUMMY_IMAGE_BASE64, DUMMY_TEXT],
],
ids=[
"list[url,base64]",
"list[url,string,base64,text]",
],
)
def test_embedding_image(llama_stack_client, embedding_model_id, contents, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_MEDIA:
pytest.xfail(f"{embedding_model_id} doesn't support media")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=contents, **default_task_type(embedding_model_id)
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == sum(len(content) if isinstance(content, list) else 1 for content in contents)
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"end",
"start",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_STRING],
],
ids=[
"long",
"short",
],
)
def test_embedding_truncation(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=contents,
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
],
)
@pytest.mark.parametrize(
"contents",
[
[DUMMY_LONG_TEXT],
[DUMMY_LONG_STRING],
],
ids=[
"long-text",
"long-str",
],
)
def test_embedding_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, contents, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
# Using LlamaStackClient from llama_stack_client will raise llama_stack_client.BadRequestError
# While using LlamaStackAsLibraryClient from llama_stack.distribution.library_client will raise the error that the backend raises
error_type = (
OpenAIBadRequestError
if isinstance(llama_stack_client, LlamaStackAsLibraryClient)
else LlamaStackBadRequestError
)
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_LONG_TEXT],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
def test_embedding_output_dimension(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_OUTPUT_DIMENSION:
pytest.xfail(f"{embedding_model_id} doesn't support output_dimension")
base_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], **default_task_type(embedding_model_id)
)
test_response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
**default_task_type(embedding_model_id),
output_dimension=32,
)
assert len(base_response.embeddings[0]) != len(test_response.embeddings[0])
assert len(test_response.embeddings[0]) == 32
def test_embedding_task_type(llama_stack_client, embedding_model_id, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
if embedding_model_id not in MODELS_SUPPORTING_TASK_TYPE:
pytest.xfail(f"{embedding_model_id} doesn't support task_type")
query_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="query"
)
document_embedding = llama_stack_client.inference.embeddings(
model_id=embedding_model_id, contents=[DUMMY_STRING], task_type="document"
)
assert query_embedding.embeddings != document_embedding.embeddings
@pytest.mark.parametrize(
"text_truncation",
[
None,
"none",
"end",
"start",
],
)
def test_embedding_text_truncation(llama_stack_client, embedding_model_id, text_truncation, inference_provider_type):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
response = llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)
assert isinstance(response, EmbeddingsResponse)
assert len(response.embeddings) == 1
assert isinstance(response.embeddings[0], list)
assert isinstance(response.embeddings[0][0], float)
@pytest.mark.parametrize(
"text_truncation",
[
"NONE",
"END",
"START",
"left",
"right",
],
)
def test_embedding_text_truncation_error(
llama_stack_client, embedding_model_id, text_truncation, inference_provider_type
):
if inference_provider_type not in SUPPORTED_PROVIDERS:
pytest.xfail(f"{inference_provider_type} doesn't support embedding model yet")
error_type = ValueError if isinstance(llama_stack_client, LlamaStackAsLibraryClient) else LlamaStackBadRequestError
with pytest.raises(error_type):
llama_stack_client.inference.embeddings(
model_id=embedding_model_id,
contents=[DUMMY_STRING],
text_truncation=text_truncation,
**default_task_type(embedding_model_id),
)

View file

@ -9,6 +9,7 @@ import time
import unicodedata
import pytest
from pydantic import BaseModel
from ..test_cases.test_case import TestCase
@ -62,6 +63,14 @@ def skip_if_model_doesnt_support_openai_completion(client_with_models, model_id)
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support OpenAI completions.")
def skip_if_doesnt_support_completions_logprobs(client_with_models, model_id):
provider_type = provider_from_model(client_with_models, model_id).provider_type
if provider_type in (
"remote::ollama", # logprobs is ignored
):
pytest.skip(f"Model {model_id} hosted by {provider_type} doesn't support /v1/completions logprobs.")
def skip_if_model_doesnt_support_suffix(client_with_models, model_id):
# To test `fim` ( fill in the middle ) completion, we need to use a model that supports suffix.
# Use this to specifically test this API functionality.
@ -205,28 +214,6 @@ def test_openai_completion_streaming(llama_stack_client, client_with_models, tex
assert len(content_str) > 10
@pytest.mark.parametrize(
"prompt_logprobs",
[
1,
0,
],
)
def test_openai_completion_prompt_logprobs(llama_stack_client, client_with_models, text_model_id, prompt_logprobs):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
prompt = "Hello, world!"
response = llama_stack_client.completions.create(
model=text_model_id,
prompt=prompt,
stream=False,
prompt_logprobs=prompt_logprobs,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert len(choice.prompt_logprobs) > 0
def test_openai_completion_guided_choice(llama_stack_client, client_with_models, text_model_id):
skip_if_provider_isnt_vllm(client_with_models, text_model_id)
@ -518,3 +505,214 @@ def test_openai_chat_completion_non_streaming_with_file(openai_client, client_wi
message_content = response.choices[0].message.content.lower().strip()
normalized_content = _normalize_text(message_content)
assert "hello world" in normalized_content
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_openai_completion_stop_sequence(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop="1963",
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
stop=["blathering", "1963"],
stream=False,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert "1963" not in choice.text
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=5,
)
assert len(response.choices) > 0
choice = response.choices[0]
assert choice.text, "Response text should not be empty"
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 5
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_openai_completion_logprobs_streaming(client_with_models, openai_client, text_model_id, test_case):
skip_if_model_doesnt_support_openai_completion(client_with_models, text_model_id)
skip_if_doesnt_support_completions_logprobs(client_with_models, text_model_id)
tc = TestCase(test_case)
response = openai_client.completions.create(
model=text_model_id,
prompt=tc["content"],
logprobs=3,
stream=True,
max_tokens=5,
)
for chunk in response:
choice = chunk.choices[0]
choice = response.choices[0]
if choice.text: # if there's a token, we expect logprobs
assert choice.logprobs, "Logprobs should not be empty"
logprobs = choice.logprobs
assert logprobs.token_logprobs, "Response tokens should not be empty"
assert len(logprobs.tokens) == len(logprobs.token_logprobs)
assert len(logprobs.token_logprobs) == len(logprobs.top_logprobs)
for i, (token, prob) in enumerate(zip(logprobs.tokens, logprobs.token_logprobs, strict=True)):
assert logprobs.top_logprobs[i][token] == prob
assert len(logprobs.top_logprobs[i]) == 3
else: # no token, no logprobs
assert not choice.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
assert len(response.choices) == 1
assert len(response.choices[0].message.tool_calls) == 1
tool_call = response.choices[0].message.tool_calls[0]
assert tool_call.function.name == tc["tools"][0]["function"]["name"]
assert "location" in tool_call.function.arguments
assert tc["expected"]["location"] in tool_call.function.arguments
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tools_and_streaming(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
# Accumulate tool calls from streaming chunks
tool_calls = []
for chunk in response:
if chunk.choices and chunk.choices[0].delta.tool_calls:
for i, tc_delta in enumerate(chunk.choices[0].delta.tool_calls):
while len(tool_calls) <= i:
tool_calls.append({"function": {"name": "", "arguments": ""}})
if tc_delta.function and tc_delta.function.name:
tool_calls[i]["function"]["name"] = tc_delta.function.name
if tc_delta.function and tc_delta.function.arguments:
tool_calls[i]["function"]["arguments"] += tc_delta.function.arguments
assert len(tool_calls) == 1
tool_call = tool_calls[0]
assert tool_call["function"]["name"] == tc["tools"][0]["function"]["name"]
assert "location" in tool_call["function"]["arguments"]
assert tc["expected"]["location"] in tool_call["function"]["arguments"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_openai_chat_completion_with_tool_choice_none(openai_client, text_model_id, test_case):
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="none",
stream=False,
)
assert len(response.choices) == 1
tool_calls = response.choices[0].message.tool_calls
assert tool_calls is None or len(tool_calls) == 0
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_openai_chat_completion_structured_output(openai_client, text_model_id, test_case):
# Note: Skip condition may need adjustment for OpenAI client
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
tc = TestCase(test_case)
response = openai_client.chat.completions.create(
model=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": AnswerFormat.model_json_schema(),
},
},
stream=False,
)
print(response.choices[0].message.content)
answer = AnswerFormat.model_validate_json(response.choices[0].message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]

View file

@ -1,545 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from time import sleep
import pytest
from pydantic import BaseModel
from llama_stack.models.llama.sku_list import resolve_model
from ..test_cases.test_case import TestCase
PROVIDER_LOGPROBS_TOP_K = {"remote::together", "remote::fireworks", "remote::vllm"}
def skip_if_model_doesnt_support_completion(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if (
provider.provider_type
in (
"remote::openai",
"remote::anthropic",
"remote::gemini",
"remote::vertexai",
"remote::groq",
"remote::sambanova",
"remote::azure",
)
or "openai-compat" in provider.provider_type
):
pytest.skip(f"Model {model_id} hosted by {provider.provider_type} doesn't support completion")
def skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, model_id):
models = {m.identifier: m for m in client_with_models.models.list()}
models.update({m.provider_resource_id: m for m in client_with_models.models.list()})
provider_id = models[model_id].provider_id
providers = {p.provider_id: p for p in client_with_models.providers.list()}
provider = providers[provider_id]
if provider.provider_type in ("remote::sambanova", "remote::azure", "remote::watsonx"):
pytest.skip(
f"Model {model_id} hosted by {provider.provider_type} doesn't support json_schema structured output"
)
def get_llama_model(client_with_models, model_id):
models = {}
for m in client_with_models.models.list():
models[m.identifier] = m
models[m.provider_resource_id] = m
assert model_id in models, f"Model {model_id} not found"
model = models[model_id]
ids = (model.identifier, model.provider_resource_id)
for mid in ids:
if resolve_model(mid):
return mid
return model.metadata.get("llama_model", None)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_non_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
assert len(response.content) > 10
# assert "blue" in response.content.lower().strip()
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:sanity",
],
)
def test_text_completion_streaming(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
# assert "blue" in content_str
assert len(content_str) > 10
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:stop_sequence",
],
)
def test_text_completion_stop_sequence(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
# This is only supported/tested for remote vLLM: https://github.com/meta-llama/llama-stack/issues/1771
if inference_provider_type != "remote::vllm":
pytest.xfail(f"{inference_provider_type} doesn't support 'stop' parameter yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 50,
"stop": ["1963"],
},
)
streamed_content = [chunk.delta for chunk in response]
content_str = "".join(streamed_content).lower().strip()
assert "1963" not in content_str
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_non_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=False,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
assert response.logprobs, "Logprobs should not be empty"
assert 1 <= len(response.logprobs) <= 5 # each token has 1 logprob and here max_tokens=5
assert all(len(logprob.logprobs_by_token) == 1 for logprob in response.logprobs)
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:log_probs",
],
)
def test_text_completion_log_probs_streaming(client_with_models, text_model_id, inference_provider_type, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
if inference_provider_type not in PROVIDER_LOGPROBS_TOP_K:
pytest.xfail(f"{inference_provider_type} doesn't support log probs yet")
tc = TestCase(test_case)
response = client_with_models.inference.completion(
content=tc["content"],
stream=True,
model_id=text_model_id,
sampling_params={
"max_tokens": 5,
},
logprobs={
"top_k": 1,
},
)
streamed_content = list(response)
for chunk in streamed_content:
if chunk.delta: # if there's a token, we expect logprobs
assert chunk.logprobs, "Logprobs should not be empty"
assert all(len(logprob.logprobs_by_token) == 1 for logprob in chunk.logprobs)
else: # no token, no logprobs
assert not chunk.logprobs, "Logprobs should be empty"
@pytest.mark.parametrize(
"test_case",
[
"inference:completion:structured_output",
],
)
def test_text_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_completion(client_with_models, text_model_id)
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class AnswerFormat(BaseModel):
name: str
year_born: str
year_retired: str
tc = TestCase(test_case)
user_input = tc["user_input"]
response = client_with_models.inference.completion(
model_id=text_model_id,
content=user_input,
stream=False,
sampling_params={
"max_tokens": 50,
},
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
)
answer = AnswerFormat.model_validate_json(response.content)
expected = tc["expected"]
assert answer.name == expected["name"]
assert answer.year_born == expected["year_born"]
assert answer.year_retired == expected["year_retired"]
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:non_streaming_01",
"inference:chat_completion:non_streaming_02",
],
)
def test_text_chat_completion_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[
{
"role": "user",
"content": question,
}
],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
assert len(message_content) > 0
assert expected.lower() in message_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:streaming_01",
"inference:chat_completion:streaming_02",
],
)
def test_text_chat_completion_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
question = tc["question"]
expected = tc["expected"]
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=[{"role": "user", "content": question}],
stream=True,
timeout=120, # Increase timeout to 2 minutes for large conversation history
)
streamed_content = [str(chunk.event.delta.text.lower().strip()) for chunk in response]
assert len(streamed_content) > 0
assert expected.lower() in "".join(streamed_content)
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_non_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=False,
)
# some models can return content for the response in addition to the tool call
assert response.completion_message.role == "assistant"
assert len(response.completion_message.tool_calls) == 1
assert response.completion_message.tool_calls[0].tool_name == tc["tools"][0]["tool_name"]
assert response.completion_message.tool_calls[0].arguments == tc["expected"]
# Will extract streamed text and separate it from tool invocation content
# The returned tool inovcation content will be a string so it's easy to comapare with expected value
# e.g. "[get_weather, {'location': 'San Francisco, CA'}]"
def extract_tool_invocation_content(response):
tool_invocation_content: str = ""
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
call = delta.tool_call
tool_invocation_content += f"[{call.tool_name}, {call.arguments}]"
return tool_invocation_content
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_calling_and_streaming(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_choice="auto",
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_required(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={
"tool_choice": "required",
},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
expected_tool_name = tc["tools"][0]["tool_name"]
expected_argument = tc["expected"]
assert tool_invocation_content == f"[{expected_tool_name}, {expected_argument}]"
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling",
],
)
def test_text_chat_completion_with_tool_choice_none(client_with_models, text_model_id, test_case):
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
tools=tc["tools"],
tool_config={"tool_choice": "none"},
stream=True,
)
tool_invocation_content = extract_tool_invocation_content(response)
assert tool_invocation_content == ""
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:structured_output",
],
)
def test_text_chat_completion_structured_output(client_with_models, text_model_id, test_case):
skip_if_model_doesnt_support_json_schema_structured_output(client_with_models, text_model_id)
class NBAStats(BaseModel):
year_for_draft: int
num_seasons_in_nba: int
class AnswerFormat(BaseModel):
first_name: str
last_name: str
year_of_birth: int
nba_stats: NBAStats
tc = TestCase(test_case)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=tc["messages"],
response_format={
"type": "json_schema",
"json_schema": AnswerFormat.model_json_schema(),
},
stream=False,
)
answer = AnswerFormat.model_validate_json(response.completion_message.content)
expected = tc["expected"]
assert answer.first_name == expected["first_name"]
assert answer.last_name == expected["last_name"]
assert answer.year_of_birth == expected["year_of_birth"]
assert answer.nba_stats.num_seasons_in_nba == expected["num_seasons_in_nba"]
assert answer.nba_stats.year_for_draft == expected["year_for_draft"]
@pytest.mark.parametrize("streaming", [True, False])
@pytest.mark.parametrize(
"test_case",
[
"inference:chat_completion:tool_calling_tools_absent",
],
)
def test_text_chat_completion_tool_calling_tools_not_in_request(
client_with_models, text_model_id, test_case, streaming
):
tc = TestCase(test_case)
# TODO: more dynamic lookup on tool_prompt_format for model family
tool_prompt_format = "json" if "3.1" in text_model_id else "python_list"
request = {
"model_id": text_model_id,
"messages": tc["messages"],
"tools": tc["tools"],
"tool_choice": "auto",
"tool_prompt_format": tool_prompt_format,
"stream": streaming,
}
response = client_with_models.inference.chat_completion(**request)
if streaming:
for chunk in response:
delta = chunk.event.delta
if delta.type == "tool_call" and delta.parse_status == "succeeded":
assert delta.tool_call.tool_name == "get_object_namespace_list"
if delta.type == "tool_call" and delta.parse_status == "failed":
# expect raw message that failed to parse in tool_call
assert isinstance(delta.tool_call, str)
assert len(delta.tool_call) > 0
else:
for tc in response.completion_message.tool_calls:
assert tc.tool_name == "get_object_namespace_list"
@pytest.mark.parametrize(
"test_case",
[
# Tests if the model can handle simple messages like "Hi" or
# a message unrelated to one of the tool calls
"inference:chat_completion:text_then_tool",
# Tests if the model can do full tool call with responses correctly
"inference:chat_completion:tool_then_answer",
# Tests if model can generate multiple params and
# read outputs correctly
"inference:chat_completion:array_parameter",
],
)
def test_text_chat_completion_with_multi_turn_tool_calling(client_with_models, text_model_id, test_case):
"""This test tests the model's tool calling loop in various scenarios"""
if "llama-4" not in text_model_id.lower() and "llama4" not in text_model_id.lower():
pytest.xfail("Not tested for non-llama4 models yet")
tc = TestCase(test_case)
messages = []
# keep going until either
# 1. we have messages to test in multi-turn
# 2. no messages bust last message is tool response
while len(tc["messages"]) > 0 or (len(messages) > 0 and messages[-1]["role"] == "tool"):
# do not take new messages if last message is tool response
if len(messages) == 0 or messages[-1]["role"] != "tool":
new_messages = tc["messages"].pop(0)
messages += new_messages
# pprint(messages)
response = client_with_models.inference.chat_completion(
model_id=text_model_id,
messages=messages,
tools=tc["tools"],
stream=False,
sampling_params={
"strategy": {
"type": "top_p",
"top_p": 0.9,
"temperature": 0.6,
}
},
)
op_msg = response.completion_message
messages.append(op_msg.model_dump())
# print(op_msg)
assert op_msg.role == "assistant"
expected = tc["expected"].pop(0)
assert len(op_msg.tool_calls) == expected["num_tool_calls"]
if expected["num_tool_calls"] > 0:
assert op_msg.tool_calls[0].tool_name == expected["tool_name"]
assert op_msg.tool_calls[0].arguments == expected["tool_arguments"]
tool_response = tc["tool_responses"].pop(0)
messages.append(
# Tool Response Message
{
"role": "tool",
"call_id": op_msg.tool_calls[0].call_id,
"content": tool_response["response"],
}
)
else:
actual_answer = op_msg.content.lower()
# pprint(actual_answer)
assert expected["answer"] in actual_answer
# sleep to avoid rate limit
sleep(1)

View file

@ -25,16 +25,19 @@ def base64_image_data(image_path):
return base64.b64encode(image_path.read_bytes()).decode("utf-8")
@pytest.fixture
def base64_image_url(base64_image_data):
return f"data:image/png;base64,{base64_image_data}"
def test_image_chat_completion_non_streaming(client_with_models, vision_model_id):
message = {
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
@ -43,12 +46,12 @@ def test_image_chat_completion_non_streaming(client_with_models, vision_model_id
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0
assert any(expected in message_content for expected in {"dog", "puppy", "pup"})
@ -68,8 +71,13 @@ def multi_image_data():
return encoded_files
@pytest.fixture
def multi_image_url(multi_image_data):
return [f"data:image/jpeg;base64,{data}" for data in multi_image_data]
@pytest.mark.parametrize("stream", [True, False])
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_data, stream):
def test_image_chat_completion_multiple_images(client_with_models, vision_model_id, multi_image_url, stream):
supported_models = ["llama-4", "gpt-4o", "llama4"]
if not any(model in vision_model_id.lower() for model in supported_models):
pytest.skip(
@ -81,15 +89,15 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": multi_image_data[0],
"type": "image_url",
"image_url": {
"url": multi_image_url[0],
},
},
{
"type": "image",
"image": {
"data": multi_image_data[1],
"type": "image_url",
"image_url": {
"url": multi_image_url[1],
},
},
{
@ -99,17 +107,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
],
},
]
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=messages,
stream=stream,
)
if stream:
message_content = ""
for chunk in response:
message_content += chunk.event.delta.text
message_content += chunk.choices[0].delta.content
else:
message_content = response.completion_message.content
message_content = response.choices[0].message.content
assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"bedroom"}), message_content
@ -125,17 +133,17 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
"role": "user",
"content": [
{
"type": "image",
"image": {
"data": multi_image_data[2],
"type": "image_url",
"image_url": {
"url": multi_image_data[2],
},
},
{"type": "text", "text": "How about this one?"},
],
},
)
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=messages,
stream=stream,
)
@ -144,7 +152,7 @@ def test_image_chat_completion_multiple_images(client_with_models, vision_model_
for chunk in response:
message_content += chunk.event.delta.text
else:
message_content = response.completion_message.content
message_content = response.choices[0].message.content
assert len(message_content) > 0
assert any(expected in message_content.lower().strip() for expected in {"sword", "shield"}), message_content
@ -154,11 +162,9 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
"role": "user",
"content": [
{
"type": "image",
"image": {
"url": {
"uri": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
"type": "image_url",
"image_url": {
"url": "https://raw.githubusercontent.com/meta-llama/llama-stack/main/tests/integration/inference/dog.png"
},
},
{
@ -167,23 +173,23 @@ def test_image_chat_completion_streaming(client_with_models, vision_model_id):
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=True,
)
streamed_content = ""
for chunk in response:
streamed_content += chunk.event.delta.text.lower()
streamed_content += chunk.choices[0].delta.content.lower()
assert len(streamed_content) > 0
assert any(expected in streamed_content for expected in {"dog", "puppy", "pup"})
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_data):
def test_image_chat_completion_base64(client_with_models, vision_model_id, base64_image_url):
image_spec = {
"type": "image",
"image": {
"data": base64_image_data,
"type": "image_url",
"image_url": {
"url": base64_image_url,
},
}
@ -197,10 +203,10 @@ def test_image_chat_completion_base64(client_with_models, vision_model_id, base6
},
],
}
response = client_with_models.inference.chat_completion(
model_id=vision_model_id,
response = client_with_models.chat.completions.create(
model=vision_model_id,
messages=[message],
stream=False,
)
message_content = response.completion_message.content.lower().strip()
message_content = response.choices[0].message.content.lower().strip()
assert len(message_content) > 0

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "float"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,89 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant. Michael Jordan was born in 1963. He played basketball for the Chicago Bulls for 15 seasons."
},
{
"role": "user",
"content": "Please give me information about Michael Jordan."
}
],
"response_format": {
"type": "json_schema",
"json_schema": {
"name": "AnswerFormat",
"schema": {
"properties": {
"first_name": {
"title": "First Name",
"type": "string"
},
"last_name": {
"title": "Last Name",
"type": "string"
},
"year_of_birth": {
"title": "Year Of Birth",
"type": "integer"
}
},
"required": [
"first_name",
"last_name",
"year_of_birth"
],
"title": "AnswerFormat",
"type": "object"
}
}
},
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-433",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "{\"first_name\": \"Michael\", \"last_name\": \"Jordan\", \"year_of_birth\": 1963}\n\n \t\t\t\t\t\t\t\t\t\t\t \t\t ",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758979490,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 31,
"prompt_tokens": 60,
"total_tokens": 91,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,31 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": [],
"encoding_format": "base64"
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 0,
"total_tokens": 0,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,316 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the name of the Sun in latin?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "The Latin",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " Sun",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": " \"",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "Sol",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": "\".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "3745da23-2db2-45a1-8ea5-2a09bbdb6a33",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920389,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 11,
"prompt_tokens": 20,
"total_tokens": 31,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": false,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-74",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! How can I assist you today?"
}
],
"created": 1758975636,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 10,
"prompt_tokens": 29,
"total_tokens": 39,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,92 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-761",
"choices": [
{
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null,
"message": {
"content": "",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": [
{
"id": "call_cj8ownwc",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function",
"index": 0
}
]
}
}
],
"created": 1758975113,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 18,
"prompt_tokens": 185,
"total_tokens": 203,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Hello, world!",
"logprobs": true,
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-809",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Hello! It's nice to meet you. Is there anything I can help you with or would you like to chat?"
}
],
"created": 1758975633,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 25,
"prompt_tokens": 29,
"total_tokens": 54,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,550 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What is the name of the US captial?"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "The name",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " the",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " US",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " capital",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Washington",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " D",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".C",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " (",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": "short",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " for",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " District",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " of",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": " Columbia",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": ").",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "f72b5be3-a677-4c38-b6ae-8c7e5cc4bf29",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920398,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 20,
"prompt_tokens": 20,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,60 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-123",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! As of my knowledge cutoff on December 15th, I have the latest information for you. However, please note that my data may not be entirely up-to-date.\n\nCurrently, and based on historical climate patterns, it appears to be a partly cloudy day with mild temperatures in San Francisco, CA. Expect a temperature range of around 48\u00b0F (9\u00b0C) to 54\u00b0F (12\u00b0C). It's likely to be a breezy day, with winds blowing at about 13 mph (21 km/h).\n\nHowever, if I were to look into more recent weather patterns or forecasts, I would recommend checking the latest conditions directly from reliable sources such as the National Weather Service or local news outlets for more accurate and up-to-date information.\n\nPlease let me know how I can further assist you.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978071,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 163,
"prompt_tokens": 45,
"total_tokens": 208,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "float",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": [
-0.9296875,
5.1875,
-2.140625,
0.171875,
-2.25,
-0.8359375,
-0.828125,
1.15625,
2.328125,
-1.0078125,
-3.0,
4.09375,
0.8359375,
0.1015625,
2.015625,
-1.0859375
],
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,39 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/embeddings",
"headers": {},
"body": {
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"input": "Test dimensions parameter",
"encoding_format": "base64",
"dimensions": 16
},
"endpoint": "/v1/embeddings",
"model": "accounts/fireworks/models/qwen3-embedding-8b"
},
"response": {
"body": {
"__type__": "openai.types.create_embedding_response.CreateEmbeddingResponse",
"__data__": {
"data": [
{
"embedding": "AABuvwAApkAAAAnAAAAwPgAAEMAAAFa/AABUvwAAlD8AABVAAACBvwAAQMAAAINAAABWPwAA0D0AAAFAAACLvw==",
"index": 0,
"object": "embedding",
"raw_output": null
}
],
"model": "accounts/fireworks/models/qwen3-embedding-8b",
"object": "list",
"usage": {
"prompt_tokens": 5,
"total_tokens": 5,
"completion_tokens": 0
},
"perf_metrics": null
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "1d64ff81-b7c4-40c6-9509-cca71759da3e",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Hello! It's nice to meet you. Is there something I can help you with, or would you like to chat?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920401,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,74 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "What's the weather in Tokyo? Use the get_weather function to get the weather."
}
],
"stream": false,
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the weather in a given city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fe94e7d-f25b-4843-ba0a-e402e0764830",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I can\u2019t help with that. If you're looking for current weather information, I recommend checking a weather website or app, such as AccuWeather or Weather.com. Is there anything else I can help you with?",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920402,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 45,
"prompt_tokens": 27,
"total_tokens": 72,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 0"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-272",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "I'm happy to help you with a test. Since we are in the middle of a text-based conversation, I'll do my best to simulate a simple test tracing process.\n\n**Trace Test Results**\n\nTo perform this test, please follow these steps:\n\n1. Type \"test\" on command mode.\n2. Press Enter.\n\nNow, let's start tracing...\n\nTest Tracing Results:\nTest Case: General Functions\nTest Case Result: PASS\n\nSystem Response:\n\n```\n# System Boot Time: 2023-10-13T14:30:00\n# CPU Temperature: 35\u00b0C\n# Disk Space Available: 80%\n```\n\nNext Steps?\n\nType 'done' to exit the test, or 'run' for more tests.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978134,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 152,
"prompt_tokens": 29,
"total_tokens": 181,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,44 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": "1963",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-183",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978053,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,112 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "system",
"content": "Pretend you are a weather assistant."
},
{
"role": "user",
"content": "What's the weather like in San Francisco, CA?"
}
],
"stream": true,
"tool_choice": "auto",
"tools": [
{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": [
"location"
]
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": [
{
"index": 0,
"id": "call_wubm4yax",
"function": {
"arguments": "{\"location\":\"San Francisco, CA\"}",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "chatcmpl-634",
"choices": [
{
"delta": {
"content": "",
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 1758975115,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": null
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,47 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"prompt": "Return the exact same sentence and don't add additional words): Michael Jordan was born in the year of 1963",
"stop": [
"blathering",
"1963"
],
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "cmpl-381",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"text": "Michael Jordan was born in the year of "
}
],
"created": 1758978056,
"model": "llama3.2:3b-instruct-fp16",
"object": "text_completion",
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 11,
"prompt_tokens": 48,
"total_tokens": 59,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,55 @@
{
"request": {
"method": "POST",
"url": "http://0.0.0.0:11434/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "llama3.2:3b-instruct-fp16",
"messages": [
{
"role": "user",
"content": "Test trace 1"
}
]
},
"endpoint": "/v1/chat/completions",
"model": "llama3.2:3b-instruct-fp16"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-122",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "It appears you're trying to initiate a conversation or test the functionality of this AI system. I'm happy to chat with you!\n\nWould you like to:\nA) Ask me a question on a specific topic\nB) Engage in a conversational dialogue on a topic of your choice\nC) Play a text-based game\nD) Test my language understanding capabilities\n\nPlease respond with the letter of your preferred activity.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758978142,
"model": "llama3.2:3b-instruct-fp16",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 85,
"prompt_tokens": 29,
"total_tokens": 114,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"stream": false,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "1bbb8db5-63e5-40cd-8ffe-59e0e88bf8f0",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": "4. At the beginning of the year, a woman has $5,000"
}
],
"created": 1758920353,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 16,
"prompt_tokens": 25,
"total_tokens": 41,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -31,14 +31,14 @@
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "chatcmpl-489",
"id": "chatcmpl-51",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "The image is a photograph of a young golden retriever puppy from the chest up. The puppy faces the camera and its tongue is out of its mouth, as if to say hello. It appears to be between 1 and 3 months of age. It is a fluffy little golden retriever puppy with very little fat. Its fur is light blond and very fluffy. It has a small, round black nose. It is in front of a blurry background of warm yellows and greys.",
"content": "The image features a close-up of a golden retriever puppy with its mouth agape. The puppy has cream-color fur with golden patches on its big ears, which are held slightly out to the sides. Its dark eyes appear black from across a great distance, while the black nose is surrounded by white fur. The puppy's mouth is wide open, revealing a healthy pink tongue and what appears to be a green leaf (likely a blade of grass) stuck in its mouth. The puppy is facing the camera directly, with its paws pressed up tight against its body. The puppy is sitting in an open field with a golden brown grass carpet. The puppy appears happy. The image is well-compressed with great digital sharpness.",
"refusal": null,
"role": "assistant",
"annotations": null,
@ -48,15 +48,15 @@
}
}
],
"created": 1758461767,
"created": 1756724768,
"model": "llama3.2-vision:11b",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": "fp_ollama",
"usage": {
"completion_tokens": 100,
"completion_tokens": 147,
"prompt_tokens": 18,
"total_tokens": 118,
"total_tokens": 165,
"completion_tokens_details": null,
"prompt_tokens_details": null
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet has rings around it with a name starting with letter S?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "26632ea9-3481-419d-bc0d-83c177257bc4",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "There are two planets in our solar system with ring systems that have names starting with the letter S:\n\n1. **Saturn** - Its ring system is one of the most iconic and well-known in our solar system. The rings are made up of ice and rock particles that range in size from tiny dust grains to massive boulders.\n2. **Saturn's moon** - The ring system of **Saturn's moon, Rhea**, is sometimes referred to as a \"ring system\" even though it's much smaller and less prominent than Saturn's. However, it's worth noting that Rhea's ring system is not as well-known as Saturn's.\n\nIf you're looking for a planet with a ring system that starts with the letter S and is not a moon, then the answer is Saturn!",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920397,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 164,
"prompt_tokens": 24,
"total_tokens": 188,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,706 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Hello, world!"
}
],
"stream": true
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "Hello!",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " It",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "'s",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " nice",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " meet",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ".",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " Is",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " there",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " something",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " I",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " can",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " help",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " with",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": ",",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " or",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " would",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " you",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " like",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " to",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": " chat",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": "?",
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "d583f66e-de11-4210-8153-54be000a2783",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "stop",
"index": 0,
"logprobs": null
}
],
"created": 1758920391,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion.chunk",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 26,
"prompt_tokens": 14,
"total_tokens": 40,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

View file

@ -0,0 +1,996 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
"max_tokens": 50,
"stream": true,
"extra_body": {}
},
"endpoint": "/v1/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": [
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " __________________"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "_____"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "1"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Identify"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentioned"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " in"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "The"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " sentence"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " mentions"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " \""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "vio"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\"\n\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "##"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Step"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " "
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "2"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ":"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " Determine"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " the"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " flower"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " v"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": ".\n"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "V"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "io"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": "lets"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " are"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " a"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " type"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": null,
"index": 0,
"logprobs": null,
"text": " of"
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": null
}
},
{
"__type__": "openai.types.completion.Completion",
"__data__": {
"id": "c9c1f727-afe7-430a-b759-df1dc392266c",
"choices": [
{
"finish_reason": "length",
"index": 0,
"logprobs": null,
"text": ""
}
],
"created": 1758920354,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "text_completion",
"system_fingerprint": null,
"usage": {
"completion_tokens": 50,
"prompt_tokens": 25,
"total_tokens": 75,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
}
],
"is_streaming": true
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,56 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"messages": [
{
"role": "user",
"content": "Which planet do humans live on?"
}
],
"stream": false
},
"endpoint": "/v1/chat/completions",
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct"
},
"response": {
"body": {
"__type__": "openai.types.chat.chat_completion.ChatCompletion",
"__data__": {
"id": "0fd60cd7-dc72-45b7-808c-4da91de80093",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"logprobs": null,
"message": {
"content": "Humans live on a planet called Earth.",
"refusal": null,
"role": "assistant",
"annotations": null,
"audio": null,
"function_call": null,
"tool_calls": null
}
}
],
"created": 1758920388,
"model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"object": "chat.completion",
"service_tier": null,
"system_fingerprint": null,
"usage": {
"completion_tokens": 9,
"prompt_tokens": 17,
"total_tokens": 26,
"completion_tokens_details": null,
"prompt_tokens_details": null
}
}
},
"is_streaming": false
}
}

View file

@ -0,0 +1,527 @@
{
"request": {
"method": "POST",
"url": "https://api.fireworks.ai/inference/v1/v1/models",
"headers": {},
"body": {},
"endpoint": "/v1/models",
"model": ""
},
"response": {
"body": [
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-dev-fp8",
"created": 1729532889,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
"created": 1743381121,
"object": "model",
"owned_by": "tvergho-87e44d",
"kind": "HF_PEFT_ADDON",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-max",
"created": 1750714611,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-kontext-pro",
"created": 1750488264,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
"created": 1748467427,
"object": "model",
"owned_by": "sentientfoundation-serverless",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3",
"created": 1735576668,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
"created": 1739563474,
"object": "model",
"owned_by": "sentientfoundation",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-120b",
"created": 1754345600,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
"created": 1753211090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
"created": 1753916446,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
"created": 1753124424,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
"created": 1753455434,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-embedding-8b",
"created": 1755707090,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 40960
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3-0324",
"created": 1742827220,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
"created": 1758586241,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct",
"created": 1752259096,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/gpt-oss-20b",
"created": 1754345466,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
"created": 1743878495,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
"created": 1754063588,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
"created": 1733442103,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
"created": 1743392739,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": false,
"context_length": 128000
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-235b-a22b",
"created": 1745885249,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5-air",
"created": 1754089426,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1",
"created": 1737397673,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"created": 1721692808,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-basic",
"created": 1742306746,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-v3p1",
"created": 1755758988,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
"created": 1729535376,
"object": "model",
"owned_by": "fireworks",
"kind": "FLUMINA_BASE_MODEL",
"supports_chat": false,
"supports_image_input": false,
"supports_tools": false
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/glm-4p5",
"created": 1753809636,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
"created": 1757018994,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 262144
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
"created": 1721428386,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
"created": 1743878279,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": true,
"supports_tools": true,
"context_length": 1048576
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b",
"created": 1745878133,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
"created": 1721287357,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 131072
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/deepseek-r1-0528",
"created": 1748456377,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 163840
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
"created": 1713375508,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": true,
"context_length": 65536
}
},
{
"__type__": "openai.types.model.Model",
"__data__": {
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
"created": 1753808388,
"object": "model",
"owned_by": "fireworks",
"kind": "HF_BASE_MODEL",
"supports_chat": true,
"supports_image_input": false,
"supports_tools": false,
"context_length": 262144
}
}
],
"is_streaming": false
}
}

View file

@ -127,9 +127,8 @@ SETUP_DEFINITIONS: dict[str, Setup] = {
name="fireworks",
description="Fireworks provider with a text model",
defaults={
"text_model": "accounts/fireworks/models/llama-v3p1-8b-instruct",
"vision_model": "accounts/fireworks/models/llama-v3p2-90b-vision-instruct",
"embedding_model": "nomic-ai/nomic-embed-text-v1.5",
"text_model": "fireworks/accounts/fireworks/models/llama-v3p1-8b-instruct",
"embedding_model": "fireworks/accounts/fireworks/models/qwen3-embedding-8b",
},
),
}

View file

@ -32,8 +32,8 @@ def setup_telemetry_data(llama_stack_client, text_model_id):
)
for i in range(2):
llama_stack_client.inference.chat_completion(
model_id=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
llama_stack_client.chat.completions.create(
model=text_model_id, messages=[{"role": "user", "content": f"Test trace {i}"}]
)
start_time = time.time()

View file

@ -83,12 +83,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -116,12 +123,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -162,12 +176,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -192,66 +213,6 @@
]
}
},
"array_parameter": {
"data": {
"messages": [
[
{
"role": "user",
"content": "Please add a new product with name 'Widget', price 19.99, in stock, and tags ['new', 'sale'] and give me the product id."
}
]
],
"tools": [
{
"tool_name": "addProduct",
"description": "Get the current weather",
"parameters": {
"name": {
"param_type": "string",
"description": "Name of the product"
},
"price": {
"param_type": "number",
"description": "Price of the product"
},
"inStock": {
"param_type": "boolean",
"description": "Availability status of the product."
},
"tags": {
"param_type": "list[str]",
"description": "List of product tags"
}
}
}
],
"tool_responses": [
{
"response": "{'response': 'Successfully added product with id: 123'}"
}
],
"expected": [
{
"num_tool_calls": 1,
"tool_name": "addProduct",
"tool_arguments": {
"name": "Widget",
"price": 19.99,
"inStock": true,
"tags": [
"new",
"sale"
]
}
},
{
"num_tool_calls": 0,
"answer": "123"
}
]
}
},
"sample_messages_tool_calling": {
"data": {
"messages": [
@ -270,13 +231,19 @@
],
"tools": [
{
"tool_name": "get_weather",
"description": "Get the current weather",
"parameters": {
"location": {
"param_type": "string",
"description": "The city and state, e.g. San Francisco, CA",
"required": true
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city and state (both required), e.g. San Francisco, CA."
}
},
"required": ["location"]
}
}
}
@ -343,18 +310,23 @@
],
"tools": [
{
"tool_name": "get_object_namespace_list",
"description": "Get the list of objects in a namespace",
"parameters": {
"kind": {
"param_type": "string",
"description": "the type of object",
"required": true
},
"namespace": {
"param_type": "string",
"description": "the name of the namespace",
"required": true
"type": "function",
"function": {
"name": "get_object_namespace_list",
"description": "Get the list of objects in a namespace",
"parameters": {
"type": "object",
"properties": {
"kind": {
"type": "string",
"description": "the type of object"
},
"namespace": {
"type": "string",
"description": "the name of the namespace"
}
},
"required": ["kind", "namespace"]
}
}
}