mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-10-12 05:54:38 +00:00
featu: support passing "extra body" throught to providers
# What does this PR do? Allows passing through extra_body parameters to inference providers. closes #2720 ## Test Plan CI and added new test
This commit is contained in:
parent
80d58ab519
commit
28dff0b89e
37 changed files with 2964 additions and 200 deletions
23
docs/static/deprecated-llama-stack-spec.html
vendored
23
docs/static/deprecated-llama-stack-spec.html
vendored
|
@ -1527,7 +1527,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAIChatCompletionRequest"
|
"$ref": "#/components/schemas/OpenAIChatCompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -1617,7 +1617,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAICompletionRequest"
|
"$ref": "#/components/schemas/OpenAICompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -7522,7 +7522,7 @@
|
||||||
"title": "OpenAIResponseFormatText",
|
"title": "OpenAIResponseFormatText",
|
||||||
"description": "Text response format for OpenAI-compatible chat completion requests."
|
"description": "Text response format for OpenAI-compatible chat completion requests."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletionRequest": {
|
"OpenAIChatCompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -7769,7 +7769,7 @@
|
||||||
"model",
|
"model",
|
||||||
"messages"
|
"messages"
|
||||||
],
|
],
|
||||||
"title": "OpenAIChatCompletionRequest",
|
"title": "OpenAIChatCompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletion": {
|
"OpenAIChatCompletion": {
|
||||||
|
@ -7966,7 +7966,7 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionWithInputMessages"
|
"title": "OpenAICompletionWithInputMessages"
|
||||||
},
|
},
|
||||||
"OpenAICompletionRequest": {
|
"OpenAICompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -8097,17 +8097,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The user to use."
|
"description": "(Optional) The user to use."
|
||||||
},
|
},
|
||||||
"guided_choice": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"description": "(Optional) vLLM-specific parameter for guided generation with a list of choices."
|
|
||||||
},
|
|
||||||
"prompt_logprobs": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "(Optional) vLLM-specific parameter for number of log probabilities to return for prompt tokens."
|
|
||||||
},
|
|
||||||
"suffix": {
|
"suffix": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The suffix that should be appended to the completion."
|
"description": "(Optional) The suffix that should be appended to the completion."
|
||||||
|
@ -8118,7 +8107,7 @@
|
||||||
"model",
|
"model",
|
||||||
"prompt"
|
"prompt"
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionRequest",
|
"title": "OpenAICompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAICompletion": {
|
"OpenAICompletion": {
|
||||||
|
|
24
docs/static/deprecated-llama-stack-spec.yaml
vendored
24
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
@ -1098,7 +1098,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionRequest'
|
$ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: true
|
deprecated: true
|
||||||
/v1/openai/v1/chat/completions/{completion_id}:
|
/v1/openai/v1/chat/completions/{completion_id}:
|
||||||
|
@ -1167,7 +1167,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAICompletionRequest'
|
$ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: true
|
deprecated: true
|
||||||
/v1/openai/v1/embeddings:
|
/v1/openai/v1/embeddings:
|
||||||
|
@ -5575,7 +5575,7 @@ components:
|
||||||
title: OpenAIResponseFormatText
|
title: OpenAIResponseFormatText
|
||||||
description: >-
|
description: >-
|
||||||
Text response format for OpenAI-compatible chat completion requests.
|
Text response format for OpenAI-compatible chat completion requests.
|
||||||
OpenAIChatCompletionRequest:
|
OpenAIChatCompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -5717,7 +5717,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- messages
|
- messages
|
||||||
title: OpenAIChatCompletionRequest
|
title: OpenAIChatCompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible chat completion endpoint.
|
Request parameters for OpenAI-compatible chat completion endpoint.
|
||||||
OpenAIChatCompletion:
|
OpenAIChatCompletion:
|
||||||
|
@ -5885,7 +5885,7 @@ components:
|
||||||
- model
|
- model
|
||||||
- input_messages
|
- input_messages
|
||||||
title: OpenAICompletionWithInputMessages
|
title: OpenAICompletionWithInputMessages
|
||||||
OpenAICompletionRequest:
|
OpenAICompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -5973,18 +5973,6 @@ components:
|
||||||
user:
|
user:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) The user to use.
|
description: (Optional) The user to use.
|
||||||
guided_choice:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for guided generation with a list of
|
|
||||||
choices.
|
|
||||||
prompt_logprobs:
|
|
||||||
type: integer
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for number of log probabilities to
|
|
||||||
return for prompt tokens.
|
|
||||||
suffix:
|
suffix:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -5993,7 +5981,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- prompt
|
- prompt
|
||||||
title: OpenAICompletionRequest
|
title: OpenAICompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible completion endpoint.
|
Request parameters for OpenAI-compatible completion endpoint.
|
||||||
OpenAICompletion:
|
OpenAICompletion:
|
||||||
|
|
23
docs/static/llama-stack-spec.html
vendored
23
docs/static/llama-stack-spec.html
vendored
|
@ -153,7 +153,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAIChatCompletionRequest"
|
"$ref": "#/components/schemas/OpenAIChatCompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -243,7 +243,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAICompletionRequest"
|
"$ref": "#/components/schemas/OpenAICompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -5018,7 +5018,7 @@
|
||||||
"title": "OpenAIResponseFormatText",
|
"title": "OpenAIResponseFormatText",
|
||||||
"description": "Text response format for OpenAI-compatible chat completion requests."
|
"description": "Text response format for OpenAI-compatible chat completion requests."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletionRequest": {
|
"OpenAIChatCompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -5265,7 +5265,7 @@
|
||||||
"model",
|
"model",
|
||||||
"messages"
|
"messages"
|
||||||
],
|
],
|
||||||
"title": "OpenAIChatCompletionRequest",
|
"title": "OpenAIChatCompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletion": {
|
"OpenAIChatCompletion": {
|
||||||
|
@ -5462,7 +5462,7 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionWithInputMessages"
|
"title": "OpenAICompletionWithInputMessages"
|
||||||
},
|
},
|
||||||
"OpenAICompletionRequest": {
|
"OpenAICompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -5593,17 +5593,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The user to use."
|
"description": "(Optional) The user to use."
|
||||||
},
|
},
|
||||||
"guided_choice": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"description": "(Optional) vLLM-specific parameter for guided generation with a list of choices."
|
|
||||||
},
|
|
||||||
"prompt_logprobs": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "(Optional) vLLM-specific parameter for number of log probabilities to return for prompt tokens."
|
|
||||||
},
|
|
||||||
"suffix": {
|
"suffix": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The suffix that should be appended to the completion."
|
"description": "(Optional) The suffix that should be appended to the completion."
|
||||||
|
@ -5614,7 +5603,7 @@
|
||||||
"model",
|
"model",
|
||||||
"prompt"
|
"prompt"
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionRequest",
|
"title": "OpenAICompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAICompletion": {
|
"OpenAICompletion": {
|
||||||
|
|
24
docs/static/llama-stack-spec.yaml
vendored
24
docs/static/llama-stack-spec.yaml
vendored
|
@ -98,7 +98,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionRequest'
|
$ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/chat/completions/{completion_id}:
|
/v1/chat/completions/{completion_id}:
|
||||||
|
@ -167,7 +167,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAICompletionRequest'
|
$ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/conversations:
|
/v1/conversations:
|
||||||
|
@ -3824,7 +3824,7 @@ components:
|
||||||
title: OpenAIResponseFormatText
|
title: OpenAIResponseFormatText
|
||||||
description: >-
|
description: >-
|
||||||
Text response format for OpenAI-compatible chat completion requests.
|
Text response format for OpenAI-compatible chat completion requests.
|
||||||
OpenAIChatCompletionRequest:
|
OpenAIChatCompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -3966,7 +3966,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- messages
|
- messages
|
||||||
title: OpenAIChatCompletionRequest
|
title: OpenAIChatCompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible chat completion endpoint.
|
Request parameters for OpenAI-compatible chat completion endpoint.
|
||||||
OpenAIChatCompletion:
|
OpenAIChatCompletion:
|
||||||
|
@ -4134,7 +4134,7 @@ components:
|
||||||
- model
|
- model
|
||||||
- input_messages
|
- input_messages
|
||||||
title: OpenAICompletionWithInputMessages
|
title: OpenAICompletionWithInputMessages
|
||||||
OpenAICompletionRequest:
|
OpenAICompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -4222,18 +4222,6 @@ components:
|
||||||
user:
|
user:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) The user to use.
|
description: (Optional) The user to use.
|
||||||
guided_choice:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for guided generation with a list of
|
|
||||||
choices.
|
|
||||||
prompt_logprobs:
|
|
||||||
type: integer
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for number of log probabilities to
|
|
||||||
return for prompt tokens.
|
|
||||||
suffix:
|
suffix:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -4242,7 +4230,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- prompt
|
- prompt
|
||||||
title: OpenAICompletionRequest
|
title: OpenAICompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible completion endpoint.
|
Request parameters for OpenAI-compatible completion endpoint.
|
||||||
OpenAICompletion:
|
OpenAICompletion:
|
||||||
|
|
23
docs/static/stainless-llama-stack-spec.html
vendored
23
docs/static/stainless-llama-stack-spec.html
vendored
|
@ -153,7 +153,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAIChatCompletionRequest"
|
"$ref": "#/components/schemas/OpenAIChatCompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -243,7 +243,7 @@
|
||||||
"content": {
|
"content": {
|
||||||
"application/json": {
|
"application/json": {
|
||||||
"schema": {
|
"schema": {
|
||||||
"$ref": "#/components/schemas/OpenAICompletionRequest"
|
"$ref": "#/components/schemas/OpenAICompletionRequestWithExtraBody"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -7027,7 +7027,7 @@
|
||||||
"title": "OpenAIResponseFormatText",
|
"title": "OpenAIResponseFormatText",
|
||||||
"description": "Text response format for OpenAI-compatible chat completion requests."
|
"description": "Text response format for OpenAI-compatible chat completion requests."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletionRequest": {
|
"OpenAIChatCompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -7274,7 +7274,7 @@
|
||||||
"model",
|
"model",
|
||||||
"messages"
|
"messages"
|
||||||
],
|
],
|
||||||
"title": "OpenAIChatCompletionRequest",
|
"title": "OpenAIChatCompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
"description": "Request parameters for OpenAI-compatible chat completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAIChatCompletion": {
|
"OpenAIChatCompletion": {
|
||||||
|
@ -7471,7 +7471,7 @@
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionWithInputMessages"
|
"title": "OpenAICompletionWithInputMessages"
|
||||||
},
|
},
|
||||||
"OpenAICompletionRequest": {
|
"OpenAICompletionRequestWithExtraBody": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"model": {
|
"model": {
|
||||||
|
@ -7602,17 +7602,6 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The user to use."
|
"description": "(Optional) The user to use."
|
||||||
},
|
},
|
||||||
"guided_choice": {
|
|
||||||
"type": "array",
|
|
||||||
"items": {
|
|
||||||
"type": "string"
|
|
||||||
},
|
|
||||||
"description": "(Optional) vLLM-specific parameter for guided generation with a list of choices."
|
|
||||||
},
|
|
||||||
"prompt_logprobs": {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "(Optional) vLLM-specific parameter for number of log probabilities to return for prompt tokens."
|
|
||||||
},
|
|
||||||
"suffix": {
|
"suffix": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) The suffix that should be appended to the completion."
|
"description": "(Optional) The suffix that should be appended to the completion."
|
||||||
|
@ -7623,7 +7612,7 @@
|
||||||
"model",
|
"model",
|
||||||
"prompt"
|
"prompt"
|
||||||
],
|
],
|
||||||
"title": "OpenAICompletionRequest",
|
"title": "OpenAICompletionRequestWithExtraBody",
|
||||||
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
"description": "Request parameters for OpenAI-compatible completion endpoint."
|
||||||
},
|
},
|
||||||
"OpenAICompletion": {
|
"OpenAICompletion": {
|
||||||
|
|
24
docs/static/stainless-llama-stack-spec.yaml
vendored
24
docs/static/stainless-llama-stack-spec.yaml
vendored
|
@ -101,7 +101,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAIChatCompletionRequest'
|
$ref: '#/components/schemas/OpenAIChatCompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/chat/completions/{completion_id}:
|
/v1/chat/completions/{completion_id}:
|
||||||
|
@ -170,7 +170,7 @@ paths:
|
||||||
content:
|
content:
|
||||||
application/json:
|
application/json:
|
||||||
schema:
|
schema:
|
||||||
$ref: '#/components/schemas/OpenAICompletionRequest'
|
$ref: '#/components/schemas/OpenAICompletionRequestWithExtraBody'
|
||||||
required: true
|
required: true
|
||||||
deprecated: false
|
deprecated: false
|
||||||
/v1/conversations:
|
/v1/conversations:
|
||||||
|
@ -5269,7 +5269,7 @@ components:
|
||||||
title: OpenAIResponseFormatText
|
title: OpenAIResponseFormatText
|
||||||
description: >-
|
description: >-
|
||||||
Text response format for OpenAI-compatible chat completion requests.
|
Text response format for OpenAI-compatible chat completion requests.
|
||||||
OpenAIChatCompletionRequest:
|
OpenAIChatCompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -5411,7 +5411,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- messages
|
- messages
|
||||||
title: OpenAIChatCompletionRequest
|
title: OpenAIChatCompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible chat completion endpoint.
|
Request parameters for OpenAI-compatible chat completion endpoint.
|
||||||
OpenAIChatCompletion:
|
OpenAIChatCompletion:
|
||||||
|
@ -5579,7 +5579,7 @@ components:
|
||||||
- model
|
- model
|
||||||
- input_messages
|
- input_messages
|
||||||
title: OpenAICompletionWithInputMessages
|
title: OpenAICompletionWithInputMessages
|
||||||
OpenAICompletionRequest:
|
OpenAICompletionRequestWithExtraBody:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
model:
|
model:
|
||||||
|
@ -5667,18 +5667,6 @@ components:
|
||||||
user:
|
user:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) The user to use.
|
description: (Optional) The user to use.
|
||||||
guided_choice:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for guided generation with a list of
|
|
||||||
choices.
|
|
||||||
prompt_logprobs:
|
|
||||||
type: integer
|
|
||||||
description: >-
|
|
||||||
(Optional) vLLM-specific parameter for number of log probabilities to
|
|
||||||
return for prompt tokens.
|
|
||||||
suffix:
|
suffix:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
@ -5687,7 +5675,7 @@ components:
|
||||||
required:
|
required:
|
||||||
- model
|
- model
|
||||||
- prompt
|
- prompt
|
||||||
title: OpenAICompletionRequest
|
title: OpenAICompletionRequestWithExtraBody
|
||||||
description: >-
|
description: >-
|
||||||
Request parameters for OpenAI-compatible completion endpoint.
|
Request parameters for OpenAI-compatible completion endpoint.
|
||||||
OpenAICompletion:
|
OpenAICompletion:
|
||||||
|
|
|
@ -15,7 +15,7 @@ from typing import (
|
||||||
)
|
)
|
||||||
|
|
||||||
from fastapi import Body
|
from fastapi import Body
|
||||||
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
from pydantic import BaseModel, Field, field_validator
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||||
|
@ -1036,8 +1036,9 @@ class ListOpenAIChatCompletionResponse(BaseModel):
|
||||||
object: Literal["list"] = "list"
|
object: Literal["list"] = "list"
|
||||||
|
|
||||||
|
|
||||||
|
# extra_body can be accessed via .model_extra
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAICompletionRequest(BaseModel):
|
class OpenAICompletionRequestWithExtraBody(BaseModel, extra="allow"):
|
||||||
"""Request parameters for OpenAI-compatible completion endpoint.
|
"""Request parameters for OpenAI-compatible completion endpoint.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
|
@ -1058,12 +1059,8 @@ class OpenAICompletionRequest(BaseModel):
|
||||||
:param top_p: (Optional) The top p to use.
|
:param top_p: (Optional) The top p to use.
|
||||||
:param user: (Optional) The user to use.
|
:param user: (Optional) The user to use.
|
||||||
:param suffix: (Optional) The suffix that should be appended to the completion.
|
:param suffix: (Optional) The suffix that should be appended to the completion.
|
||||||
:param guided_choice: (Optional) vLLM-specific parameter for guided generation with a list of choices.
|
|
||||||
:param prompt_logprobs: (Optional) vLLM-specific parameter for number of log probabilities to return for prompt tokens.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(extra="allow")
|
|
||||||
|
|
||||||
# Standard OpenAI completion parameters
|
# Standard OpenAI completion parameters
|
||||||
model: str
|
model: str
|
||||||
prompt: str | list[str] | list[int] | list[list[int]]
|
prompt: str | list[str] | list[int] | list[list[int]]
|
||||||
|
@ -1082,17 +1079,12 @@ class OpenAICompletionRequest(BaseModel):
|
||||||
temperature: float | None = None
|
temperature: float | None = None
|
||||||
top_p: float | None = None
|
top_p: float | None = None
|
||||||
user: str | None = None
|
user: str | None = None
|
||||||
|
|
||||||
# vLLM-specific parameters (documented here but also allowed via extra fields)
|
|
||||||
guided_choice: list[str] | None = None
|
|
||||||
prompt_logprobs: int | None = None
|
|
||||||
|
|
||||||
# for fill-in-the-middle type completion
|
|
||||||
suffix: str | None = None
|
suffix: str | None = None
|
||||||
|
|
||||||
|
|
||||||
|
# extra_body can be accessed via .model_extra
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIChatCompletionRequest(BaseModel):
|
class OpenAIChatCompletionRequestWithExtraBody(BaseModel, extra="allow"):
|
||||||
"""Request parameters for OpenAI-compatible chat completion endpoint.
|
"""Request parameters for OpenAI-compatible chat completion endpoint.
|
||||||
|
|
||||||
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
:param model: The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint.
|
||||||
|
@ -1120,8 +1112,6 @@ class OpenAIChatCompletionRequest(BaseModel):
|
||||||
:param user: (Optional) The user to use.
|
:param user: (Optional) The user to use.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
model_config = ConfigDict(extra="allow")
|
|
||||||
|
|
||||||
# Standard OpenAI chat completion parameters
|
# Standard OpenAI chat completion parameters
|
||||||
model: str
|
model: str
|
||||||
messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)]
|
messages: Annotated[list[OpenAIMessageParam], Field(..., min_length=1)]
|
||||||
|
@ -1182,7 +1172,7 @@ class InferenceProvider(Protocol):
|
||||||
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: Annotated[OpenAICompletionRequest, Body(...)],
|
params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
"""Create completion.
|
"""Create completion.
|
||||||
|
|
||||||
|
@ -1195,7 +1185,7 @@ class InferenceProvider(Protocol):
|
||||||
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: Annotated[OpenAIChatCompletionRequest, Body(...)],
|
params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)],
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Create chat completions.
|
"""Create chat completions.
|
||||||
|
|
||||||
|
|
|
@ -32,13 +32,13 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIChatCompletionToolCall,
|
OpenAIChatCompletionToolCall,
|
||||||
OpenAIChatCompletionToolCallFunction,
|
OpenAIChatCompletionToolCallFunction,
|
||||||
OpenAIChoice,
|
OpenAIChoice,
|
||||||
OpenAIChoiceLogprobs,
|
OpenAIChoiceLogprobs,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAICompletionWithInputMessages,
|
OpenAICompletionWithInputMessages,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
|
@ -183,7 +183,7 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: Annotated[OpenAICompletionRequest, Body(...)],
|
params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}",
|
f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}",
|
||||||
|
@ -218,7 +218,7 @@ class InferenceRouter(Inference):
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: Annotated[OpenAIChatCompletionRequest, Body(...)],
|
params: Annotated[OpenAIChatCompletionRequestWithExtraBody, Body(...)],
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}",
|
f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}",
|
||||||
|
@ -317,7 +317,7 @@ class InferenceRouter(Inference):
|
||||||
raise NotImplementedError("Get chat completion is not supported: inference store is not configured.")
|
raise NotImplementedError("Get chat completion is not supported: inference store is not configured.")
|
||||||
|
|
||||||
async def _nonstream_openai_chat_completion(
|
async def _nonstream_openai_chat_completion(
|
||||||
self, provider: Inference, params: OpenAIChatCompletionRequest
|
self, provider: Inference, params: OpenAIChatCompletionRequestWithExtraBody
|
||||||
) -> OpenAIChatCompletion:
|
) -> OpenAIChatCompletion:
|
||||||
response = await provider.openai_chat_completion(params)
|
response = await provider.openai_chat_completion(params)
|
||||||
for choice in response.choices:
|
for choice in response.choices:
|
||||||
|
|
|
@ -49,7 +49,7 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
Message,
|
Message,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIDeveloperMessageParam,
|
OpenAIDeveloperMessageParam,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAISystemMessageParam,
|
OpenAISystemMessageParam,
|
||||||
|
@ -583,7 +583,7 @@ class ChatAgent(ShieldRunnerMixin):
|
||||||
max_tokens = getattr(sampling_params, "max_tokens", None)
|
max_tokens = getattr(sampling_params, "max_tokens", None)
|
||||||
|
|
||||||
# Use OpenAI chat completion
|
# Use OpenAI chat completion
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=self.agent_config.model,
|
model=self.agent_config.model,
|
||||||
messages=openai_messages,
|
messages=openai_messages,
|
||||||
tools=openai_tools if openai_tools else None,
|
tools=openai_tools if openai_tools else None,
|
||||||
|
|
|
@ -49,7 +49,7 @@ from llama_stack.apis.inference import (
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIChatCompletionToolCall,
|
OpenAIChatCompletionToolCall,
|
||||||
OpenAIChoice,
|
OpenAIChoice,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
|
@ -169,7 +169,7 @@ class StreamingResponseOrchestrator:
|
||||||
# (some providers don't support non-empty response_format when tools are present)
|
# (some providers don't support non-empty response_format when tools are present)
|
||||||
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
response_format = None if self.ctx.response_format.type == "text" else self.ctx.response_format
|
||||||
logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
|
logger.debug(f"calling openai_chat_completion with tools: {self.ctx.chat_tools}")
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=self.ctx.model,
|
model=self.ctx.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
tools=self.ctx.chat_tools,
|
tools=self.ctx.chat_tools,
|
||||||
|
|
|
@ -22,8 +22,8 @@ from llama_stack.apis.files import Files, OpenAIFilePurpose
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAIDeveloperMessageParam,
|
OpenAIDeveloperMessageParam,
|
||||||
OpenAIMessageParam,
|
OpenAIMessageParam,
|
||||||
OpenAISystemMessageParam,
|
OpenAISystemMessageParam,
|
||||||
|
@ -608,7 +608,7 @@ class ReferenceBatchesImpl(Batches):
|
||||||
# TODO(SECURITY): review body for security issues
|
# TODO(SECURITY): review body for security issues
|
||||||
if request.url == "/v1/chat/completions":
|
if request.url == "/v1/chat/completions":
|
||||||
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
request.body["messages"] = [convert_to_openai_message_param(msg) for msg in request.body["messages"]]
|
||||||
chat_params = OpenAIChatCompletionRequest(**request.body)
|
chat_params = OpenAIChatCompletionRequestWithExtraBody(**request.body)
|
||||||
chat_response = await self.inference_api.openai_chat_completion(chat_params)
|
chat_response = await self.inference_api.openai_chat_completion(chat_params)
|
||||||
|
|
||||||
# this is for mypy, we don't allow streaming so we'll get the right type
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
|
@ -623,7 +623,7 @@ class ReferenceBatchesImpl(Batches):
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
elif request.url == "/v1/completions":
|
elif request.url == "/v1/completions":
|
||||||
completion_params = OpenAICompletionRequest(**request.body)
|
completion_params = OpenAICompletionRequestWithExtraBody(**request.body)
|
||||||
completion_response = await self.inference_api.openai_completion(completion_params)
|
completion_response = await self.inference_api.openai_completion(completion_params)
|
||||||
|
|
||||||
# this is for mypy, we don't allow streaming so we'll get the right type
|
# this is for mypy, we don't allow streaming so we'll get the right type
|
||||||
|
|
|
@ -14,8 +14,8 @@ from llama_stack.apis.datasetio import DatasetIO
|
||||||
from llama_stack.apis.datasets import Datasets
|
from llama_stack.apis.datasets import Datasets
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAISystemMessageParam,
|
OpenAISystemMessageParam,
|
||||||
OpenAIUserMessageParam,
|
OpenAIUserMessageParam,
|
||||||
UserMessage,
|
UserMessage,
|
||||||
|
@ -175,7 +175,7 @@ class MetaReferenceEvalImpl(
|
||||||
sampling_params["stop"] = candidate.sampling_params.stop
|
sampling_params["stop"] = candidate.sampling_params.stop
|
||||||
|
|
||||||
input_content = json.loads(x[ColumnName.completion_input.value])
|
input_content = json.loads(x[ColumnName.completion_input.value])
|
||||||
params = OpenAICompletionRequest(
|
params = OpenAICompletionRequestWithExtraBody(
|
||||||
model=candidate.model,
|
model=candidate.model,
|
||||||
prompt=input_content,
|
prompt=input_content,
|
||||||
**sampling_params,
|
**sampling_params,
|
||||||
|
@ -195,7 +195,7 @@ class MetaReferenceEvalImpl(
|
||||||
messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"]
|
messages += [OpenAISystemMessageParam(**x) for x in chat_completion_input_json if x["role"] == "system"]
|
||||||
|
|
||||||
messages += input_messages
|
messages += input_messages
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=candidate.model,
|
model=candidate.model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
**sampling_params,
|
**sampling_params,
|
||||||
|
|
|
@ -9,8 +9,8 @@ from collections.abc import AsyncIterator
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
InferenceProvider,
|
InferenceProvider,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
@ -67,7 +67,7 @@ class MetaReferenceInferenceImpl(
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError("OpenAI completion not supported by meta reference provider")
|
raise NotImplementedError("OpenAI completion not supported by meta reference provider")
|
||||||
|
|
||||||
|
@ -153,6 +153,6 @@ class MetaReferenceInferenceImpl(
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")
|
raise NotImplementedError("OpenAI chat completion not supported by meta-reference inference provider")
|
||||||
|
|
|
@ -8,8 +8,8 @@ from collections.abc import AsyncIterator
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
InferenceProvider,
|
InferenceProvider,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
|
@ -72,12 +72,12 @@ class SentenceTransformersInferenceImpl(
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError("OpenAI completion not supported by sentence transformers provider")
|
raise NotImplementedError("OpenAI completion not supported by sentence transformers provider")
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider")
|
raise NotImplementedError("OpenAI chat completion not supported by sentence transformers provider")
|
||||||
|
|
|
@ -13,7 +13,7 @@ from llama_stack.apis.common.content_types import ImageContentItem, TextContentI
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
Message,
|
Message,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIUserMessageParam,
|
OpenAIUserMessageParam,
|
||||||
UserMessage,
|
UserMessage,
|
||||||
)
|
)
|
||||||
|
@ -296,7 +296,7 @@ class LlamaGuardShield:
|
||||||
else:
|
else:
|
||||||
shield_input_message = self.build_text_shield_input(messages)
|
shield_input_message = self.build_text_shield_input(messages)
|
||||||
|
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[shield_input_message],
|
messages=[shield_input_message],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
@ -384,7 +384,7 @@ class LlamaGuardShield:
|
||||||
# TODO: Add Image based support for OpenAI Moderations
|
# TODO: Add Image based support for OpenAI Moderations
|
||||||
shield_input_message = self.build_text_shield_input(messages)
|
shield_input_message = self.build_text_shield_input(messages)
|
||||||
|
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[shield_input_message],
|
messages=[shield_input_message],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
|
|
@ -6,7 +6,7 @@
|
||||||
import re
|
import re
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequest
|
from llama_stack.apis.inference import Inference, OpenAIChatCompletionRequestWithExtraBody
|
||||||
from llama_stack.apis.scoring import ScoringResultRow
|
from llama_stack.apis.scoring import ScoringResultRow
|
||||||
from llama_stack.apis.scoring_functions import ScoringFnParams
|
from llama_stack.apis.scoring_functions import ScoringFnParams
|
||||||
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
from llama_stack.providers.utils.scoring.base_scoring_fn import RegisteredBaseScoringFn
|
||||||
|
@ -55,7 +55,7 @@ class LlmAsJudgeScoringFn(RegisteredBaseScoringFn):
|
||||||
generated_answer=generated_answer,
|
generated_answer=generated_answer,
|
||||||
)
|
)
|
||||||
|
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=fn_def.params.judge_model,
|
model=fn_def.params.judge_model,
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
from jinja2 import Template
|
from jinja2 import Template
|
||||||
|
|
||||||
from llama_stack.apis.common.content_types import InterleavedContent
|
from llama_stack.apis.common.content_types import InterleavedContent
|
||||||
from llama_stack.apis.inference import OpenAIChatCompletionRequest, OpenAIUserMessageParam
|
from llama_stack.apis.inference import OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||||
from llama_stack.apis.tools.rag_tool import (
|
from llama_stack.apis.tools.rag_tool import (
|
||||||
DefaultRAGQueryGeneratorConfig,
|
DefaultRAGQueryGeneratorConfig,
|
||||||
LLMRAGQueryGeneratorConfig,
|
LLMRAGQueryGeneratorConfig,
|
||||||
|
@ -65,7 +65,7 @@ async def llm_rag_query_generator(
|
||||||
|
|
||||||
model = config.model
|
model = config.model
|
||||||
message = OpenAIUserMessageParam(content=rendered_content)
|
message = OpenAIUserMessageParam(content=rendered_content)
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[message],
|
messages=[message],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
|
|
@ -12,8 +12,8 @@ from botocore.client import BaseClient
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
ChatCompletionRequest,
|
ChatCompletionRequest,
|
||||||
Inference,
|
Inference,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.inference.inference import (
|
from llama_stack.apis.inference.inference import (
|
||||||
|
@ -134,12 +134,12 @@ class BedrockInferenceAdapter(
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError("OpenAI completion not supported by the Bedrock provider")
|
raise NotImplementedError("OpenAI completion not supported by the Bedrock provider")
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
|
raise NotImplementedError("OpenAI chat completion not supported by the Bedrock provider")
|
||||||
|
|
|
@ -8,7 +8,7 @@ from collections.abc import Iterable
|
||||||
|
|
||||||
from databricks.sdk import WorkspaceClient
|
from databricks.sdk import WorkspaceClient
|
||||||
|
|
||||||
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequest
|
from llama_stack.apis.inference import OpenAICompletion, OpenAICompletionRequestWithExtraBody
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
@ -39,6 +39,6 @@ class DatabricksInferenceAdapter(OpenAIMixin):
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
|
@ -3,7 +3,12 @@
|
||||||
#
|
#
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
from llama_stack.apis.inference.inference import OpenAICompletion, OpenAICompletionRequest, OpenAIEmbeddingsResponse
|
|
||||||
|
from llama_stack.apis.inference.inference import (
|
||||||
|
OpenAICompletion,
|
||||||
|
OpenAICompletionRequestWithExtraBody,
|
||||||
|
OpenAIEmbeddingsResponse,
|
||||||
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
from llama_stack.providers.remote.inference.llama_openai_compat.config import LlamaCompatConfig
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
@ -29,7 +34,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
raise NotImplementedError()
|
raise NotImplementedError()
|
||||||
|
|
||||||
|
|
|
@ -13,9 +13,9 @@ from llama_stack.apis.inference import (
|
||||||
Inference,
|
Inference,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
|
@ -79,7 +79,7 @@ class PassthroughInferenceAdapter(Inference):
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
model_obj = await self.model_store.get_model(params.model)
|
model_obj = await self.model_store.get_model(params.model)
|
||||||
|
@ -93,7 +93,7 @@ class PassthroughInferenceAdapter(Inference):
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
client = self._get_client()
|
client = self._get_client()
|
||||||
model_obj = await self.model_store.get_model(params.model)
|
model_obj = await self.model_store.get_model(params.model)
|
||||||
|
|
|
@ -9,7 +9,7 @@ from collections.abc import AsyncIterator
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
)
|
)
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||||
|
|
||||||
|
@ -31,7 +31,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""Override to add RunPod-specific stream_options requirement."""
|
"""Override to add RunPod-specific stream_options requirement."""
|
||||||
params = params.model_copy()
|
params = params.model_copy()
|
||||||
|
|
|
@ -14,7 +14,7 @@ from pydantic import ConfigDict
|
||||||
|
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
)
|
)
|
||||||
from llama_stack.log import get_logger
|
from llama_stack.log import get_logger
|
||||||
|
@ -93,7 +93,7 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
params = params.model_copy()
|
params = params.model_copy()
|
||||||
|
|
||||||
|
|
|
@ -16,9 +16,9 @@ from llama_stack.apis.inference import (
|
||||||
JsonSchemaResponseFormat,
|
JsonSchemaResponseFormat,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAIEmbeddingData,
|
OpenAIEmbeddingData,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIEmbeddingUsage,
|
OpenAIEmbeddingUsage,
|
||||||
|
@ -226,7 +226,7 @@ class LiteLLMOpenAIMixin(
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
model_obj = await self.model_store.get_model(params.model)
|
model_obj = await self.model_store.get_model(params.model)
|
||||||
|
|
||||||
|
@ -248,8 +248,6 @@ class LiteLLMOpenAIMixin(
|
||||||
temperature=params.temperature,
|
temperature=params.temperature,
|
||||||
top_p=params.top_p,
|
top_p=params.top_p,
|
||||||
user=params.user,
|
user=params.user,
|
||||||
guided_choice=params.guided_choice,
|
|
||||||
prompt_logprobs=params.prompt_logprobs,
|
|
||||||
suffix=params.suffix,
|
suffix=params.suffix,
|
||||||
api_key=self.get_api_key(),
|
api_key=self.get_api_key(),
|
||||||
api_base=self.api_base,
|
api_base=self.api_base,
|
||||||
|
@ -258,7 +256,7 @@ class LiteLLMOpenAIMixin(
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
# Add usage tracking for streaming when telemetry is active
|
# Add usage tracking for streaming when telemetry is active
|
||||||
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
from llama_stack.providers.utils.telemetry.tracing import get_current_span
|
||||||
|
|
|
@ -17,9 +17,9 @@ from llama_stack.apis.inference import (
|
||||||
Model,
|
Model,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionChunk,
|
OpenAIChatCompletionChunk,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAICompletion,
|
OpenAICompletion,
|
||||||
OpenAICompletionRequest,
|
OpenAICompletionRequestWithExtraBody,
|
||||||
OpenAIEmbeddingData,
|
OpenAIEmbeddingData,
|
||||||
OpenAIEmbeddingsResponse,
|
OpenAIEmbeddingsResponse,
|
||||||
OpenAIEmbeddingUsage,
|
OpenAIEmbeddingUsage,
|
||||||
|
@ -223,21 +223,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
||||||
|
|
||||||
async def openai_completion(
|
async def openai_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAICompletionRequest,
|
params: OpenAICompletionRequestWithExtraBody,
|
||||||
) -> OpenAICompletion:
|
) -> OpenAICompletion:
|
||||||
"""
|
"""
|
||||||
Direct OpenAI completion API call.
|
Direct OpenAI completion API call.
|
||||||
"""
|
"""
|
||||||
# Handle parameters that are not supported by OpenAI API, but may be by the provider
|
|
||||||
# prompt_logprobs is supported by vLLM
|
|
||||||
# guided_choice is supported by vLLM
|
|
||||||
# TODO: test coverage
|
|
||||||
extra_body: dict[str, Any] = {}
|
|
||||||
if params.prompt_logprobs is not None and params.prompt_logprobs >= 0:
|
|
||||||
extra_body["prompt_logprobs"] = params.prompt_logprobs
|
|
||||||
if params.guided_choice:
|
|
||||||
extra_body["guided_choice"] = params.guided_choice
|
|
||||||
|
|
||||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||||
completion_kwargs = await prepare_openai_completion_params(
|
completion_kwargs = await prepare_openai_completion_params(
|
||||||
model=await self._get_provider_model_id(params.model),
|
model=await self._get_provider_model_id(params.model),
|
||||||
|
@ -259,13 +249,15 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
||||||
user=params.user,
|
user=params.user,
|
||||||
suffix=params.suffix,
|
suffix=params.suffix,
|
||||||
)
|
)
|
||||||
resp = await self.client.completions.create(**completion_kwargs, extra_body=extra_body)
|
if extra_body := params.model_extra:
|
||||||
|
completion_kwargs["extra_body"] = extra_body
|
||||||
|
resp = await self.client.completions.create(**completion_kwargs)
|
||||||
|
|
||||||
return await self._maybe_overwrite_id(resp, params.stream) # type: ignore[no-any-return]
|
return await self._maybe_overwrite_id(resp, params.stream) # type: ignore[no-any-return]
|
||||||
|
|
||||||
async def openai_chat_completion(
|
async def openai_chat_completion(
|
||||||
self,
|
self,
|
||||||
params: OpenAIChatCompletionRequest,
|
params: OpenAIChatCompletionRequestWithExtraBody,
|
||||||
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
) -> OpenAIChatCompletion | AsyncIterator[OpenAIChatCompletionChunk]:
|
||||||
"""
|
"""
|
||||||
Direct OpenAI chat completion API call.
|
Direct OpenAI chat completion API call.
|
||||||
|
@ -316,6 +308,8 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
||||||
user=params.user,
|
user=params.user,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if extra_body := params.model_extra:
|
||||||
|
request_params["extra_body"] = extra_body
|
||||||
resp = await self.client.chat.completions.create(**request_params)
|
resp = await self.client.chat.completions.create(**request_params)
|
||||||
|
|
||||||
return await self._maybe_overwrite_id(resp, params.stream) # type: ignore[no-any-return]
|
return await self._maybe_overwrite_id(resp, params.stream) # type: ignore[no-any-return]
|
||||||
|
|
|
@ -0,0 +1,881 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "https://api.openai.com/v1/v1/models",
|
||||||
|
"headers": {},
|
||||||
|
"body": {},
|
||||||
|
"endpoint": "/v1/models",
|
||||||
|
"model": ""
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-0613",
|
||||||
|
"created": 1686588896,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4",
|
||||||
|
"created": 1687882411,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo",
|
||||||
|
"created": 1677610602,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "sora-2-pro",
|
||||||
|
"created": 1759708663,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-audio-mini-2025-10-06",
|
||||||
|
"created": 1759512137,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-realtime-mini",
|
||||||
|
"created": 1759517133,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-realtime-mini-2025-10-06",
|
||||||
|
"created": 1759517175,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "sora-2",
|
||||||
|
"created": 1759708615,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "davinci-002",
|
||||||
|
"created": 1692634301,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "babbage-002",
|
||||||
|
"created": 1692634615,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo-instruct",
|
||||||
|
"created": 1692901427,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo-instruct-0914",
|
||||||
|
"created": 1694122472,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "dall-e-3",
|
||||||
|
"created": 1698785189,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "dall-e-2",
|
||||||
|
"created": 1698798177,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-1106-preview",
|
||||||
|
"created": 1698957206,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo-1106",
|
||||||
|
"created": 1698959748,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "tts-1-hd",
|
||||||
|
"created": 1699046015,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "tts-1-1106",
|
||||||
|
"created": 1699053241,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "tts-1-hd-1106",
|
||||||
|
"created": 1699053533,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "text-embedding-3-small",
|
||||||
|
"created": 1705948997,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "text-embedding-3-large",
|
||||||
|
"created": 1705953180,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-0125-preview",
|
||||||
|
"created": 1706037612,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-turbo-preview",
|
||||||
|
"created": 1706037777,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo-0125",
|
||||||
|
"created": 1706048358,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-turbo",
|
||||||
|
"created": 1712361441,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4-turbo-2024-04-09",
|
||||||
|
"created": 1712601677,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o",
|
||||||
|
"created": 1715367049,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-2024-05-13",
|
||||||
|
"created": 1715368132,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-2024-07-18",
|
||||||
|
"created": 1721172717,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini",
|
||||||
|
"created": 1721172741,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-2024-08-06",
|
||||||
|
"created": 1722814719,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "chatgpt-4o-latest",
|
||||||
|
"created": 1723515131,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1-mini-2024-09-12",
|
||||||
|
"created": 1725648979,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1-mini",
|
||||||
|
"created": 1725649008,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-realtime-preview-2024-10-01",
|
||||||
|
"created": 1727131766,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-audio-preview-2024-10-01",
|
||||||
|
"created": 1727389042,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-audio-preview",
|
||||||
|
"created": 1727460443,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-realtime-preview",
|
||||||
|
"created": 1727659998,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "omni-moderation-latest",
|
||||||
|
"created": 1731689265,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "omni-moderation-2024-09-26",
|
||||||
|
"created": 1732734466,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-realtime-preview-2024-12-17",
|
||||||
|
"created": 1733945430,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-audio-preview-2024-12-17",
|
||||||
|
"created": 1734034239,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-realtime-preview-2024-12-17",
|
||||||
|
"created": 1734112601,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-audio-preview-2024-12-17",
|
||||||
|
"created": 1734115920,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1-2024-12-17",
|
||||||
|
"created": 1734326976,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1",
|
||||||
|
"created": 1734375816,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-realtime-preview",
|
||||||
|
"created": 1734387380,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-audio-preview",
|
||||||
|
"created": 1734387424,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o3-mini",
|
||||||
|
"created": 1737146383,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o3-mini-2025-01-31",
|
||||||
|
"created": 1738010200,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-2024-11-20",
|
||||||
|
"created": 1739331543,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-search-preview-2025-03-11",
|
||||||
|
"created": 1741388170,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-search-preview",
|
||||||
|
"created": 1741388720,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-search-preview-2025-03-11",
|
||||||
|
"created": 1741390858,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-search-preview",
|
||||||
|
"created": 1741391161,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-transcribe",
|
||||||
|
"created": 1742068463,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-transcribe",
|
||||||
|
"created": 1742068596,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1-pro-2025-03-19",
|
||||||
|
"created": 1742251504,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o1-pro",
|
||||||
|
"created": 1742251791,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-mini-tts",
|
||||||
|
"created": 1742403959,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o3-2025-04-16",
|
||||||
|
"created": 1744133301,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o4-mini-2025-04-16",
|
||||||
|
"created": 1744133506,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o3",
|
||||||
|
"created": 1744225308,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o4-mini",
|
||||||
|
"created": 1744225351,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1-2025-04-14",
|
||||||
|
"created": 1744315746,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1",
|
||||||
|
"created": 1744316542,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1-mini-2025-04-14",
|
||||||
|
"created": 1744317547,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1-mini",
|
||||||
|
"created": 1744318173,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1-nano-2025-04-14",
|
||||||
|
"created": 1744321025,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4.1-nano",
|
||||||
|
"created": 1744321707,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-image-1",
|
||||||
|
"created": 1745517030,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "codex-mini-latest",
|
||||||
|
"created": 1746673257,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-realtime-preview-2025-06-03",
|
||||||
|
"created": 1748907838,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-4o-audio-preview-2025-06-03",
|
||||||
|
"created": 1748908498,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o4-mini-deep-research",
|
||||||
|
"created": 1749685485,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "o4-mini-deep-research-2025-06-26",
|
||||||
|
"created": 1750866121,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-chat-latest",
|
||||||
|
"created": 1754073306,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-2025-08-07",
|
||||||
|
"created": 1754075360,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5",
|
||||||
|
"created": 1754425777,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-mini-2025-08-07",
|
||||||
|
"created": 1754425867,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-mini",
|
||||||
|
"created": 1754425928,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-nano-2025-08-07",
|
||||||
|
"created": 1754426303,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-nano",
|
||||||
|
"created": 1754426384,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-audio-2025-08-28",
|
||||||
|
"created": 1756256146,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-realtime",
|
||||||
|
"created": 1756271701,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-realtime-2025-08-28",
|
||||||
|
"created": 1756271773,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-audio",
|
||||||
|
"created": 1756339249,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-codex",
|
||||||
|
"created": 1757527818,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-image-1-mini",
|
||||||
|
"created": 1758845821,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-pro-2025-10-06",
|
||||||
|
"created": 1759469707,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-5-pro",
|
||||||
|
"created": 1759469822,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-audio-mini",
|
||||||
|
"created": 1759512027,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "system"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "gpt-3.5-turbo-16k",
|
||||||
|
"created": 1683758102,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai-internal"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "tts-1",
|
||||||
|
"created": 1681940951,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai-internal"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "whisper-1",
|
||||||
|
"created": 1677532384,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai-internal"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "text-embedding-ada-002",
|
||||||
|
"created": 1671217299,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "openai-internal"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,80 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/models",
|
||||||
|
"headers": {},
|
||||||
|
"body": {},
|
||||||
|
"endpoint": "/v1/models",
|
||||||
|
"model": ""
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2-vision:11b",
|
||||||
|
"created": 1759959879,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "nomic-embed-text:latest",
|
||||||
|
"created": 1754610899,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama-guard3:1b",
|
||||||
|
"created": 1754088388,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "all-minilm:l6-v2",
|
||||||
|
"created": 1753826826,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "all-minilm:latest",
|
||||||
|
"created": 1749064003,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.1:8b-instruct-fp16",
|
||||||
|
"created": 1739575404,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "llama3.2:3b-instruct-fp16",
|
||||||
|
"created": 1737496003,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "library"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,45 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/models",
|
||||||
|
"headers": {},
|
||||||
|
"body": {},
|
||||||
|
"endpoint": "/v1/models",
|
||||||
|
"model": ""
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "Qwen/Qwen3-0.6B",
|
||||||
|
"created": 1760135828,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "vllm",
|
||||||
|
"root": "Qwen/Qwen3-0.6B",
|
||||||
|
"parent": null,
|
||||||
|
"max_model_len": 4096,
|
||||||
|
"permission": [
|
||||||
|
{
|
||||||
|
"id": "modelperm-5119df1e8c3246148a1d43e60357e420",
|
||||||
|
"object": "model_permission",
|
||||||
|
"created": 1760135828,
|
||||||
|
"allow_create_engine": false,
|
||||||
|
"allow_sampling": true,
|
||||||
|
"allow_logprobs": true,
|
||||||
|
"allow_search_indices": false,
|
||||||
|
"allow_view": true,
|
||||||
|
"allow_fine_tuning": false,
|
||||||
|
"organization": "*",
|
||||||
|
"group": null,
|
||||||
|
"is_blocking": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,543 @@
|
||||||
|
{
|
||||||
|
"test_id": null,
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "https://api.fireworks.ai/inference/v1/v1/models",
|
||||||
|
"headers": {},
|
||||||
|
"body": {},
|
||||||
|
"endpoint": "/v1/models",
|
||||||
|
"model": ""
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/flux-1-dev-fp8",
|
||||||
|
"created": 1729532889,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "FLUMINA_BASE_MODEL",
|
||||||
|
"supports_chat": false,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/flux-kontext-max",
|
||||||
|
"created": 1750714611,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "FLUMINA_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": true,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/flux-kontext-pro",
|
||||||
|
"created": 1750488264,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "FLUMINA_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": true,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/sentientfoundation-serverless/models/dobby-mini-unhinged-plus-llama-3-1-8b",
|
||||||
|
"created": 1748467427,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "sentientfoundation-serverless",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/sentientfoundation/models/dobby-unhinged-llama-3-3-70b-new",
|
||||||
|
"created": 1739563474,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "sentientfoundation",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/gpt-oss-120b",
|
||||||
|
"created": 1754345600,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-235b-a22b-instruct-2507",
|
||||||
|
"created": 1753124424,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-235b-a22b-thinking-2507",
|
||||||
|
"created": 1753455434,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-v3-0324",
|
||||||
|
"created": 1742827220,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/kimi-k2-instruct",
|
||||||
|
"created": 1752259096,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/gpt-oss-20b",
|
||||||
|
"created": 1754345466,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/kimi-k2-instruct-0905",
|
||||||
|
"created": 1757018994,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama-v3p3-70b-instruct",
|
||||||
|
"created": 1733442103,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-235b-a22b",
|
||||||
|
"created": 1745885249,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/glm-4p5-air",
|
||||||
|
"created": 1754089426,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-v3p1",
|
||||||
|
"created": 1755758988,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/flux-1-schnell-fp8",
|
||||||
|
"created": 1729535376,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "FLUMINA_BASE_MODEL",
|
||||||
|
"supports_chat": false,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama-v3p1-405b-instruct",
|
||||||
|
"created": 1721428386,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama4-scout-instruct-basic",
|
||||||
|
"created": 1743878279,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": true,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 1048576
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-30b-a3b",
|
||||||
|
"created": 1745878133,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama-v3p1-70b-instruct",
|
||||||
|
"created": 1721287357,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-r1-0528",
|
||||||
|
"created": 1748456377,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/mixtral-8x22b-instruct",
|
||||||
|
"created": 1713375508,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 65536
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama4-maverick-instruct-basic",
|
||||||
|
"created": 1743878495,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": true,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 1048576
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen2p5-vl-32b-instruct",
|
||||||
|
"created": 1743392739,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": true,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 128000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-v3p1-terminus",
|
||||||
|
"created": 1758586241,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/llama-v3p1-8b-instruct",
|
||||||
|
"created": 1721692808,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
|
||||||
|
"created": 1753211090,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-30b-a3b-thinking-2507",
|
||||||
|
"created": 1753916446,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-embedding-8b",
|
||||||
|
"created": 1755707090,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "EMBEDDING_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 40960
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-reranker-8b",
|
||||||
|
"created": 1759865045,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "EMBEDDING_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 40960
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/glm-4p5",
|
||||||
|
"created": 1753809636,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-coder-30b-a3b-instruct",
|
||||||
|
"created": 1754063588,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-r1",
|
||||||
|
"created": 1737397673,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-v3",
|
||||||
|
"created": 1735576668,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": true,
|
||||||
|
"context_length": 131072
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/deepseek-r1-basic",
|
||||||
|
"created": 1742306746,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 163840
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/fireworks/models/qwen3-30b-a3b-instruct-2507",
|
||||||
|
"created": 1753808388,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "fireworks",
|
||||||
|
"kind": "HF_BASE_MODEL",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false,
|
||||||
|
"context_length": 262144
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.model.Model",
|
||||||
|
"__data__": {
|
||||||
|
"id": "accounts/tvergho-87e44d/models/debatecards-70b-ft-3epoch-dpo-v2",
|
||||||
|
"created": 1743381121,
|
||||||
|
"object": "model",
|
||||||
|
"owned_by": "tvergho-87e44d",
|
||||||
|
"kind": "HF_PEFT_ADDON",
|
||||||
|
"supports_chat": true,
|
||||||
|
"supports_image_input": false,
|
||||||
|
"supports_tools": false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,991 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_streaming[txt=ollama/llama3.2:3b-instruct-fp16-inference:completion:sanity]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://0.0.0.0:11434/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"prompt": "Respond to this question and explain your answer. Complete the sentence using one word: Roses are red, violets are ",
|
||||||
|
"max_tokens": 50,
|
||||||
|
"stream": true
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "llama3.2:3b-instruct-fp16"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": [
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "blue"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ".\n\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "The"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " classic"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " nursery"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " rhyme"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " goes"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ":\n\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "R"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "oses"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " are"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " red"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ",\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "V"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "io"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "lets"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " are"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " blue"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ".\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "Sugar"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " is"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " sweet"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ",\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "And"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " so"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " are"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " you"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ".\n\n"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "This"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " completes"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " the"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " traditional"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " rhyme"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " with"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " the"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " second"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " line"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " being"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " \""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "vio"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "lets"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " are"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " blue"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "\","
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " which"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " has"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " been"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " a"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " ubiquitous"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": null,
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " and"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-8567635651a5",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": ""
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "llama3.2:3b-instruct-fp16",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": "fp_ollama",
|
||||||
|
"usage": null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"is_streaming": true
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,48 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "I am feeling really sad today.",
|
||||||
|
"stream": false
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-d2ba309413e8",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " I have been working on a project that I feel like I'm not doing well",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 16,
|
||||||
|
"prompt_tokens": 7,
|
||||||
|
"total_tokens": 23,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"service_tier": null,
|
||||||
|
"kv_transfer_params": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "I am feeling really sad today.",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"guided_choices": [
|
||||||
|
"joy",
|
||||||
|
"sadness"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-e3727f6c749a",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "length",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": " I feel that I am not good enough, and I feel like I have no",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 16,
|
||||||
|
"prompt_tokens": 7,
|
||||||
|
"total_tokens": 23,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"service_tier": null,
|
||||||
|
"kv_transfer_params": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -0,0 +1,54 @@
|
||||||
|
{
|
||||||
|
"test_id": "tests/integration/inference/test_openai_completion.py::test_openai_completion_guided_choice[txt=vllm/Qwen/Qwen3-0.6B]",
|
||||||
|
"request": {
|
||||||
|
"method": "POST",
|
||||||
|
"url": "http://localhost:8000/v1/v1/completions",
|
||||||
|
"headers": {},
|
||||||
|
"body": {
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"prompt": "I am feeling really sad today.",
|
||||||
|
"stream": false,
|
||||||
|
"extra_body": {
|
||||||
|
"guided_choice": [
|
||||||
|
"joy",
|
||||||
|
"sadness"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"endpoint": "/v1/completions",
|
||||||
|
"model": "Qwen/Qwen3-0.6B"
|
||||||
|
},
|
||||||
|
"response": {
|
||||||
|
"body": {
|
||||||
|
"__type__": "openai.types.completion.Completion",
|
||||||
|
"__data__": {
|
||||||
|
"id": "rec-f02f1bfd75ad",
|
||||||
|
"choices": [
|
||||||
|
{
|
||||||
|
"finish_reason": "stop",
|
||||||
|
"index": 0,
|
||||||
|
"logprobs": null,
|
||||||
|
"text": "sadness",
|
||||||
|
"stop_reason": null,
|
||||||
|
"prompt_logprobs": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"created": 0,
|
||||||
|
"model": "Qwen/Qwen3-0.6B",
|
||||||
|
"object": "text_completion",
|
||||||
|
"system_fingerprint": null,
|
||||||
|
"usage": {
|
||||||
|
"completion_tokens": 3,
|
||||||
|
"prompt_tokens": 7,
|
||||||
|
"total_tokens": 10,
|
||||||
|
"completion_tokens_details": null,
|
||||||
|
"prompt_tokens_details": null
|
||||||
|
},
|
||||||
|
"service_tier": null,
|
||||||
|
"kv_transfer_params": null
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"is_streaming": false
|
||||||
|
},
|
||||||
|
"id_normalization_mapping": {}
|
||||||
|
}
|
|
@ -223,7 +223,7 @@ def test_openai_completion_guided_choice(llama_stack_client, client_with_models,
|
||||||
model=text_model_id,
|
model=text_model_id,
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
stream=False,
|
stream=False,
|
||||||
guided_choice=["joy", "sadness"],
|
extra_body={"guided_choice": ["joy", "sadness"]},
|
||||||
)
|
)
|
||||||
assert len(response.choices) > 0
|
assert len(response.choices) > 0
|
||||||
choice = response.choices[0]
|
choice = response.choices[0]
|
||||||
|
|
|
@ -33,7 +33,7 @@ from llama_stack.apis.agents.openai_responses import (
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletionContentPartTextParam,
|
OpenAIChatCompletionContentPartTextParam,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIDeveloperMessageParam,
|
OpenAIDeveloperMessageParam,
|
||||||
OpenAIJSONSchema,
|
OpenAIJSONSchema,
|
||||||
OpenAIResponseFormatJSONObject,
|
OpenAIResponseFormatJSONObject,
|
||||||
|
@ -162,7 +162,7 @@ async def test_create_openai_response_with_string_input(openai_responses_impl, m
|
||||||
chunks = [chunk async for chunk in result]
|
chunks = [chunk async for chunk in result]
|
||||||
|
|
||||||
mock_inference_api.openai_chat_completion.assert_called_once_with(
|
mock_inference_api.openai_chat_completion.assert_called_once_with(
|
||||||
OpenAIChatCompletionRequest(
|
OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model=model,
|
model=model,
|
||||||
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
|
messages=[OpenAIUserMessageParam(role="user", content="What is the capital of Ireland?", name=None)],
|
||||||
response_format=None,
|
response_format=None,
|
||||||
|
|
|
@ -13,11 +13,16 @@ import pytest
|
||||||
from llama_stack.apis.inference import (
|
from llama_stack.apis.inference import (
|
||||||
OpenAIAssistantMessageParam,
|
OpenAIAssistantMessageParam,
|
||||||
OpenAIChatCompletion,
|
OpenAIChatCompletion,
|
||||||
OpenAIChatCompletionRequest,
|
OpenAIChatCompletionRequestWithExtraBody,
|
||||||
OpenAIChoice,
|
OpenAIChoice,
|
||||||
|
OpenAICompletion,
|
||||||
|
OpenAICompletionChoice,
|
||||||
|
OpenAICompletionRequestWithExtraBody,
|
||||||
ToolChoice,
|
ToolChoice,
|
||||||
)
|
)
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
|
from llama_stack.core.routers.inference import InferenceRouter
|
||||||
|
from llama_stack.core.routing_tables.models import ModelsRoutingTable
|
||||||
from llama_stack.providers.datatypes import HealthStatus
|
from llama_stack.providers.datatypes import HealthStatus
|
||||||
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
|
from llama_stack.providers.remote.inference.vllm.config import VLLMInferenceAdapterConfig
|
||||||
from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
|
from llama_stack.providers.remote.inference.vllm.vllm import VLLMInferenceAdapter
|
||||||
|
@ -57,7 +62,7 @@ async def test_old_vllm_tool_choice(vllm_inference_adapter):
|
||||||
mock_client_property.return_value = mock_client
|
mock_client_property.return_value = mock_client
|
||||||
|
|
||||||
# No tools but auto tool choice
|
# No tools but auto tool choice
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model="mock-model",
|
model="mock-model",
|
||||||
messages=[{"role": "user", "content": "test"}],
|
messages=[{"role": "user", "content": "test"}],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
@ -173,7 +178,7 @@ async def test_openai_chat_completion_is_async(vllm_inference_adapter):
|
||||||
)
|
)
|
||||||
|
|
||||||
async def do_inference():
|
async def do_inference():
|
||||||
params = OpenAIChatCompletionRequest(
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
model="mock-model",
|
model="mock-model",
|
||||||
messages=[{"role": "user", "content": "one fish two fish"}],
|
messages=[{"role": "user", "content": "one fish two fish"}],
|
||||||
stream=False,
|
stream=False,
|
||||||
|
@ -191,3 +196,148 @@ async def test_openai_chat_completion_is_async(vllm_inference_adapter):
|
||||||
|
|
||||||
assert mock_create_client.call_count == 4 # no cheating
|
assert mock_create_client.call_count == 4 # no cheating
|
||||||
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
|
assert total_time < (sleep_time * 2), f"Total time taken: {total_time}s exceeded expected max"
|
||||||
|
|
||||||
|
|
||||||
|
async def test_vllm_completion_extra_body():
|
||||||
|
"""
|
||||||
|
Test that vLLM-specific guided_choice and prompt_logprobs parameters are correctly forwarded
|
||||||
|
via extra_body to the underlying OpenAI client through the InferenceRouter.
|
||||||
|
"""
|
||||||
|
# Set up the vLLM adapter
|
||||||
|
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||||
|
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||||
|
vllm_adapter.__provider_id__ = "vllm"
|
||||||
|
await vllm_adapter.initialize()
|
||||||
|
|
||||||
|
# Create a mock model store
|
||||||
|
mock_model_store = AsyncMock()
|
||||||
|
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm")
|
||||||
|
mock_model_store.get_model.return_value = mock_model
|
||||||
|
mock_model_store.has_model.return_value = True
|
||||||
|
|
||||||
|
# Create a mock dist_registry
|
||||||
|
mock_dist_registry = MagicMock()
|
||||||
|
mock_dist_registry.get = AsyncMock(return_value=mock_model)
|
||||||
|
mock_dist_registry.set = AsyncMock()
|
||||||
|
|
||||||
|
# Set up the routing table
|
||||||
|
routing_table = ModelsRoutingTable(
|
||||||
|
impls_by_provider_id={"vllm": vllm_adapter},
|
||||||
|
dist_registry=mock_dist_registry,
|
||||||
|
policy=[],
|
||||||
|
)
|
||||||
|
# Inject the model store into the adapter
|
||||||
|
vllm_adapter.model_store = routing_table
|
||||||
|
|
||||||
|
# Create the InferenceRouter
|
||||||
|
router = InferenceRouter(routing_table=routing_table)
|
||||||
|
|
||||||
|
# Patch the OpenAI client
|
||||||
|
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_client_property:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.completions.create = AsyncMock(
|
||||||
|
return_value=OpenAICompletion(
|
||||||
|
id="cmpl-abc123",
|
||||||
|
created=1,
|
||||||
|
model="mock-model",
|
||||||
|
choices=[
|
||||||
|
OpenAICompletionChoice(
|
||||||
|
text="joy",
|
||||||
|
finish_reason="stop",
|
||||||
|
index=0,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
mock_client_property.return_value = mock_client
|
||||||
|
|
||||||
|
# Test with guided_choice and prompt_logprobs as extra fields
|
||||||
|
params = OpenAICompletionRequestWithExtraBody(
|
||||||
|
model="mock-model",
|
||||||
|
prompt="I am feeling happy",
|
||||||
|
stream=False,
|
||||||
|
guided_choice=["joy", "sadness"],
|
||||||
|
prompt_logprobs=5,
|
||||||
|
)
|
||||||
|
await router.openai_completion(params)
|
||||||
|
|
||||||
|
# Verify that the client was called with extra_body containing both parameters
|
||||||
|
mock_client.completions.create.assert_called_once()
|
||||||
|
call_kwargs = mock_client.completions.create.call_args.kwargs
|
||||||
|
assert "extra_body" in call_kwargs
|
||||||
|
assert "guided_choice" in call_kwargs["extra_body"]
|
||||||
|
assert call_kwargs["extra_body"]["guided_choice"] == ["joy", "sadness"]
|
||||||
|
assert "prompt_logprobs" in call_kwargs["extra_body"]
|
||||||
|
assert call_kwargs["extra_body"]["prompt_logprobs"] == 5
|
||||||
|
|
||||||
|
|
||||||
|
async def test_vllm_chat_completion_extra_body():
|
||||||
|
"""
|
||||||
|
Test that vLLM-specific parameters (e.g., chat_template_kwargs) are correctly forwarded
|
||||||
|
via extra_body to the underlying OpenAI client through the InferenceRouter for chat completion.
|
||||||
|
"""
|
||||||
|
# Set up the vLLM adapter
|
||||||
|
config = VLLMInferenceAdapterConfig(url="http://mocked.localhost:12345")
|
||||||
|
vllm_adapter = VLLMInferenceAdapter(config=config)
|
||||||
|
vllm_adapter.__provider_id__ = "vllm"
|
||||||
|
await vllm_adapter.initialize()
|
||||||
|
|
||||||
|
# Create a mock model store
|
||||||
|
mock_model_store = AsyncMock()
|
||||||
|
mock_model = Model(identifier="mock-model", provider_resource_id="mock-model", provider_id="vllm")
|
||||||
|
mock_model_store.get_model.return_value = mock_model
|
||||||
|
mock_model_store.has_model.return_value = True
|
||||||
|
|
||||||
|
# Create a mock dist_registry
|
||||||
|
mock_dist_registry = MagicMock()
|
||||||
|
mock_dist_registry.get = AsyncMock(return_value=mock_model)
|
||||||
|
mock_dist_registry.set = AsyncMock()
|
||||||
|
|
||||||
|
# Set up the routing table
|
||||||
|
routing_table = ModelsRoutingTable(
|
||||||
|
impls_by_provider_id={"vllm": vllm_adapter},
|
||||||
|
dist_registry=mock_dist_registry,
|
||||||
|
policy=[],
|
||||||
|
)
|
||||||
|
# Inject the model store into the adapter
|
||||||
|
vllm_adapter.model_store = routing_table
|
||||||
|
|
||||||
|
# Create the InferenceRouter
|
||||||
|
router = InferenceRouter(routing_table=routing_table)
|
||||||
|
|
||||||
|
# Patch the OpenAI client
|
||||||
|
with patch.object(VLLMInferenceAdapter, "client", new_callable=PropertyMock) as mock_client_property:
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.chat.completions.create = AsyncMock(
|
||||||
|
return_value=OpenAIChatCompletion(
|
||||||
|
id="chatcmpl-abc123",
|
||||||
|
created=1,
|
||||||
|
model="mock-model",
|
||||||
|
choices=[
|
||||||
|
OpenAIChoice(
|
||||||
|
message=OpenAIAssistantMessageParam(
|
||||||
|
content="test response",
|
||||||
|
),
|
||||||
|
finish_reason="stop",
|
||||||
|
index=0,
|
||||||
|
)
|
||||||
|
],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
mock_client_property.return_value = mock_client
|
||||||
|
|
||||||
|
# Test with chat_template_kwargs as extra field
|
||||||
|
params = OpenAIChatCompletionRequestWithExtraBody(
|
||||||
|
model="mock-model",
|
||||||
|
messages=[{"role": "user", "content": "test"}],
|
||||||
|
stream=False,
|
||||||
|
chat_template_kwargs={"thinking": True},
|
||||||
|
)
|
||||||
|
await router.openai_chat_completion(params)
|
||||||
|
|
||||||
|
# Verify that the client was called with extra_body containing chat_template_kwargs
|
||||||
|
mock_client.chat.completions.create.assert_called_once()
|
||||||
|
call_kwargs = mock_client.chat.completions.create.call_args.kwargs
|
||||||
|
assert "extra_body" in call_kwargs
|
||||||
|
assert "chat_template_kwargs" in call_kwargs["extra_body"]
|
||||||
|
assert call_kwargs["extra_body"]["chat_template_kwargs"] == {"thinking": True}
|
||||||
|
|
|
@ -12,7 +12,7 @@ from unittest.mock import AsyncMock, MagicMock, Mock, PropertyMock, patch
|
||||||
import pytest
|
import pytest
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
|
|
||||||
from llama_stack.apis.inference import Model, OpenAIChatCompletionRequest, OpenAIUserMessageParam
|
from llama_stack.apis.inference import Model, OpenAIChatCompletionRequestWithExtraBody, OpenAIUserMessageParam
|
||||||
from llama_stack.apis.models import ModelType
|
from llama_stack.apis.models import ModelType
|
||||||
from llama_stack.core.request_headers import request_provider_data_context
|
from llama_stack.core.request_headers import request_provider_data_context
|
||||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||||
|
@ -271,7 +271,7 @@ class TestOpenAIMixinImagePreprocessing:
|
||||||
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
||||||
mock_localize.return_value = (b"fake_image_data", "jpeg")
|
mock_localize.return_value = (b"fake_image_data", "jpeg")
|
||||||
|
|
||||||
params = OpenAIChatCompletionRequest(model="test-model", messages=[message])
|
params = OpenAIChatCompletionRequestWithExtraBody(model="test-model", messages=[message])
|
||||||
await mixin.openai_chat_completion(params)
|
await mixin.openai_chat_completion(params)
|
||||||
|
|
||||||
mock_localize.assert_called_once_with("http://example.com/image.jpg")
|
mock_localize.assert_called_once_with("http://example.com/image.jpg")
|
||||||
|
@ -304,7 +304,7 @@ class TestOpenAIMixinImagePreprocessing:
|
||||||
|
|
||||||
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
|
with patch.object(type(mixin), "client", new_callable=PropertyMock, return_value=mock_client):
|
||||||
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
with patch("llama_stack.providers.utils.inference.openai_mixin.localize_image_content") as mock_localize:
|
||||||
params = OpenAIChatCompletionRequest(model="test-model", messages=[message])
|
params = OpenAIChatCompletionRequestWithExtraBody(model="test-model", messages=[message])
|
||||||
await mixin.openai_chat_completion(params)
|
await mixin.openai_chat_completion(params)
|
||||||
|
|
||||||
mock_localize.assert_not_called()
|
mock_localize.assert_not_called()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue