mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-28 04:51:59 +00:00
Merge remote-tracking branch 'refs/remotes/origin/feat/litellm_sambanova_usage' into feat/litellm_sambanova_usage
This commit is contained in:
commit
daf0c26420
86 changed files with 3494 additions and 835 deletions
598
docs/_static/llama-stack-spec.html
vendored
598
docs/_static/llama-stack-spec.html
vendored
|
|
@ -85,7 +85,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/batch-inference/chat-completion": {
|
||||
"/v1/inference/batch-chat-completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -112,7 +112,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"BatchInference (Coming Soon)"
|
||||
"Inference"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
|
|
@ -128,7 +128,7 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/batch-inference/completion": {
|
||||
"/v1/inference/batch-completion": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
|
|
@ -155,7 +155,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"BatchInference (Coming Soon)"
|
||||
"Inference"
|
||||
],
|
||||
"description": "",
|
||||
"parameters": [],
|
||||
|
|
@ -239,7 +239,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
"BatchInference (Coming Soon)"
|
||||
],
|
||||
"description": "Generate a chat completion for the given messages using the specified model.",
|
||||
"parameters": [],
|
||||
|
|
@ -287,7 +287,7 @@
|
|||
}
|
||||
},
|
||||
"tags": [
|
||||
"Inference"
|
||||
"BatchInference (Coming Soon)"
|
||||
],
|
||||
"description": "Generate a completion for the given content using the specified model.",
|
||||
"parameters": [],
|
||||
|
|
@ -3096,11 +3096,18 @@
|
|||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"description": "Response from an OpenAI-compatible chat completion request. **OR** Chunk from a streaming response to an OpenAI-compatible chat completion request.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletion"
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletion"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionChunk"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4366,6 +4373,51 @@
|
|||
],
|
||||
"title": "ToolCall"
|
||||
},
|
||||
"ToolConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tool_choice": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"required",
|
||||
"none"
|
||||
],
|
||||
"title": "ToolChoice",
|
||||
"description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"default": "auto",
|
||||
"description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls."
|
||||
},
|
||||
"system_message_behavior": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"append",
|
||||
"replace"
|
||||
],
|
||||
"description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.",
|
||||
"default": "append"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "ToolConfig",
|
||||
"description": "Configuration for tool use."
|
||||
},
|
||||
"ToolDefinition": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -4554,7 +4606,7 @@
|
|||
"BatchChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"messages_batch": {
|
||||
|
|
@ -4575,25 +4627,8 @@
|
|||
"$ref": "#/components/schemas/ToolDefinition"
|
||||
}
|
||||
},
|
||||
"tool_choice": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"required",
|
||||
"none"
|
||||
],
|
||||
"title": "ToolChoice",
|
||||
"description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"title": "ToolPromptFormat",
|
||||
"description": "Prompt format for calling custom / zero shot tools."
|
||||
"tool_config": {
|
||||
"$ref": "#/components/schemas/ToolConfig"
|
||||
},
|
||||
"response_format": {
|
||||
"$ref": "#/components/schemas/ResponseFormat"
|
||||
|
|
@ -4613,7 +4648,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"model_id",
|
||||
"messages_batch"
|
||||
],
|
||||
"title": "BatchChatCompletionRequest"
|
||||
|
|
@ -4710,7 +4745,7 @@
|
|||
"BatchCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"model": {
|
||||
"model_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"content_batch": {
|
||||
|
|
@ -4740,7 +4775,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"model",
|
||||
"model_id",
|
||||
"content_batch"
|
||||
],
|
||||
"title": "BatchCompletionRequest"
|
||||
|
|
@ -4812,51 +4847,6 @@
|
|||
],
|
||||
"title": "CancelTrainingJobRequest"
|
||||
},
|
||||
"ToolConfig": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tool_choice": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"auto",
|
||||
"required",
|
||||
"none"
|
||||
],
|
||||
"title": "ToolChoice",
|
||||
"description": "Whether tool use is required or automatic. This is a hint to the model which may not be followed. It depends on the Instruction Following capabilities of the model."
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
}
|
||||
],
|
||||
"default": "auto",
|
||||
"description": "(Optional) Whether tool use is automatic, required, or none. Can also specify a tool name to use a specific tool. Defaults to ToolChoice.auto."
|
||||
},
|
||||
"tool_prompt_format": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"json",
|
||||
"function_tag",
|
||||
"python_list"
|
||||
],
|
||||
"description": "(Optional) Instructs the model how to format tool calls. By default, Llama Stack will attempt to use a format that is best adapted to the model. - `ToolPromptFormat.json`: The tool calls are formatted as a JSON object. - `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name> tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python syntax -- a list of function calls."
|
||||
},
|
||||
"system_message_behavior": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"append",
|
||||
"replace"
|
||||
],
|
||||
"description": "(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`: Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`: Replaces the default system prompt with the provided system message. The system message can include the string '{{function_definitions}}' to indicate where the function definitions should be inserted.",
|
||||
"default": "append"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "ToolConfig",
|
||||
"description": "Configuration for tool use."
|
||||
},
|
||||
"ChatCompletionRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -7906,7 +7896,13 @@
|
|||
"type": "object",
|
||||
"properties": {
|
||||
"status": {
|
||||
"type": "string"
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"OK",
|
||||
"Error",
|
||||
"Not Implemented"
|
||||
],
|
||||
"title": "HealthStatus"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -8101,6 +8097,31 @@
|
|||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"health": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -8108,7 +8129,8 @@
|
|||
"api",
|
||||
"provider_id",
|
||||
"provider_type",
|
||||
"config"
|
||||
"config",
|
||||
"health"
|
||||
],
|
||||
"title": "ProviderInfo"
|
||||
},
|
||||
|
|
@ -8842,7 +8864,17 @@
|
|||
"description": "Must be \"assistant\" to identify this as the model's response"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The content of the model's response"
|
||||
},
|
||||
"name": {
|
||||
|
|
@ -8852,9 +8884,9 @@
|
|||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/ToolCall"
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||
},
|
||||
"description": "List of tool calls. Each tool call is a ToolCall object."
|
||||
"description": "List of tool calls. Each tool call is an OpenAIChatCompletionToolCall object."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -8865,6 +8897,98 @@
|
|||
"title": "OpenAIAssistantMessageParam",
|
||||
"description": "A message containing the model's (assistant) response in an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIChatCompletionContentPartImageParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "image_url",
|
||||
"default": "image_url"
|
||||
},
|
||||
"image_url": {
|
||||
"$ref": "#/components/schemas/OpenAIImageURL"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"image_url"
|
||||
],
|
||||
"title": "OpenAIChatCompletionContentPartImageParam"
|
||||
},
|
||||
"OpenAIChatCompletionContentPartParam": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"text": "#/components/schemas/OpenAIChatCompletionContentPartTextParam",
|
||||
"image_url": "#/components/schemas/OpenAIChatCompletionContentPartImageParam"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIChatCompletionContentPartTextParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "text",
|
||||
"default": "text"
|
||||
},
|
||||
"text": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"text"
|
||||
],
|
||||
"title": "OpenAIChatCompletionContentPartTextParam"
|
||||
},
|
||||
"OpenAIChatCompletionToolCall": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"index": {
|
||||
"type": "integer"
|
||||
},
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "function",
|
||||
"default": "function"
|
||||
},
|
||||
"function": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionToolCallFunction"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIChatCompletionToolCall"
|
||||
},
|
||||
"OpenAIChatCompletionToolCallFunction": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"arguments": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChatCompletionToolCallFunction"
|
||||
},
|
||||
"OpenAIDeveloperMessageParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -8875,7 +8999,17 @@
|
|||
"description": "Must be \"developer\" to identify this as a developer message"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The content of the developer message"
|
||||
},
|
||||
"name": {
|
||||
|
|
@ -8891,6 +9025,66 @@
|
|||
"title": "OpenAIDeveloperMessageParam",
|
||||
"description": "A message from the developer in an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIImageURL": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"url": {
|
||||
"type": "string"
|
||||
},
|
||||
"detail": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"url"
|
||||
],
|
||||
"title": "OpenAIImageURL"
|
||||
},
|
||||
"OpenAIJSONSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": {
|
||||
"type": "string"
|
||||
},
|
||||
"strict": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "null"
|
||||
},
|
||||
{
|
||||
"type": "boolean"
|
||||
},
|
||||
{
|
||||
"type": "number"
|
||||
},
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array"
|
||||
},
|
||||
{
|
||||
"type": "object"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"name"
|
||||
],
|
||||
"title": "OpenAIJSONSchema"
|
||||
},
|
||||
"OpenAIMessageParam": {
|
||||
"oneOf": [
|
||||
{
|
||||
|
|
@ -8920,6 +9114,76 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseFormatJSONObject": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "json_object",
|
||||
"default": "json_object"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseFormatJSONObject"
|
||||
},
|
||||
"OpenAIResponseFormatJSONSchema": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "json_schema",
|
||||
"default": "json_schema"
|
||||
},
|
||||
"json_schema": {
|
||||
"$ref": "#/components/schemas/OpenAIJSONSchema"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type",
|
||||
"json_schema"
|
||||
],
|
||||
"title": "OpenAIResponseFormatJSONSchema"
|
||||
},
|
||||
"OpenAIResponseFormatParam": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseFormatText"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseFormatJSONSchema"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseFormatJSONObject"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"text": "#/components/schemas/OpenAIResponseFormatText",
|
||||
"json_schema": "#/components/schemas/OpenAIResponseFormatJSONSchema",
|
||||
"json_object": "#/components/schemas/OpenAIResponseFormatJSONObject"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseFormatText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "text",
|
||||
"default": "text"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseFormatText"
|
||||
},
|
||||
"OpenAISystemMessageParam": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -8930,7 +9194,17 @@
|
|||
"description": "Must be \"system\" to identify this as a system message"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The content of the \"system prompt\". If multiple system messages are provided, they are concatenated. The underlying Llama Stack code may also add other system messages (for example, for formatting tool definitions)."
|
||||
},
|
||||
"name": {
|
||||
|
|
@ -8960,7 +9234,17 @@
|
|||
"description": "Unique identifier for the tool call this response is for"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The response content from the tool"
|
||||
}
|
||||
},
|
||||
|
|
@ -8983,7 +9267,17 @@
|
|||
"description": "Must be \"user\" to identify this as a user message"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"oneOf": [
|
||||
{
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionContentPartParam"
|
||||
}
|
||||
}
|
||||
],
|
||||
"description": "The content of the message, which can include text and other media"
|
||||
},
|
||||
"name": {
|
||||
|
|
@ -9111,10 +9405,7 @@
|
|||
"description": "(Optional) The penalty for repeated tokens"
|
||||
},
|
||||
"response_format": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"$ref": "#/components/schemas/OpenAIResponseFormatParam",
|
||||
"description": "(Optional) The response format to use"
|
||||
},
|
||||
"seed": {
|
||||
|
|
@ -9291,6 +9582,46 @@
|
|||
"title": "OpenAIChatCompletion",
|
||||
"description": "Response from an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIChatCompletionChunk": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "The ID of the chat completion"
|
||||
},
|
||||
"choices": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChunkChoice"
|
||||
},
|
||||
"description": "List of choices"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "chat.completion.chunk",
|
||||
"default": "chat.completion.chunk",
|
||||
"description": "The object type, which will be \"chat.completion.chunk\""
|
||||
},
|
||||
"created": {
|
||||
"type": "integer",
|
||||
"description": "The Unix timestamp in seconds when the chat completion was created"
|
||||
},
|
||||
"model": {
|
||||
"type": "string",
|
||||
"description": "The model that was used to generate the chat completion"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"choices",
|
||||
"object",
|
||||
"created",
|
||||
"model"
|
||||
],
|
||||
"title": "OpenAIChatCompletionChunk",
|
||||
"description": "Chunk from a streaming response to an OpenAI-compatible chat completion request."
|
||||
},
|
||||
"OpenAIChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9303,10 +9634,12 @@
|
|||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs"
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
|
|
@ -9318,6 +9651,33 @@
|
|||
"title": "OpenAIChoice",
|
||||
"description": "A choice from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChoiceDelta": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"content": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The content of the delta"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The refusal of the delta"
|
||||
},
|
||||
"role": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The role of the delta"
|
||||
},
|
||||
"tool_calls": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAIChatCompletionToolCall"
|
||||
},
|
||||
"description": "(Optional) The tool calls of the delta"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceDelta",
|
||||
"description": "A delta from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAIChoiceLogprobs": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9325,19 +9685,50 @@
|
|||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
},
|
||||
"refusal": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/components/schemas/OpenAITokenLogProb"
|
||||
}
|
||||
},
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "OpenAIChoiceLogprobs",
|
||||
"description": "The log probabilities for the tokens in the message from an OpenAI-compatible chat completion response."
|
||||
},
|
||||
"OpenAIChunkChoice": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"delta": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceDelta",
|
||||
"description": "The delta from the chunk"
|
||||
},
|
||||
"finish_reason": {
|
||||
"type": "string",
|
||||
"description": "The reason the model stopped generating"
|
||||
},
|
||||
"index": {
|
||||
"type": "integer",
|
||||
"description": "The index of the choice"
|
||||
},
|
||||
"logprobs": {
|
||||
"$ref": "#/components/schemas/OpenAIChoiceLogprobs",
|
||||
"description": "(Optional) The log probabilities for the tokens in the message"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"delta",
|
||||
"finish_reason",
|
||||
"index"
|
||||
],
|
||||
"title": "OpenAIChunkChoice",
|
||||
"description": "A chunk choice from an OpenAI-compatible chat completion streaming response."
|
||||
},
|
||||
"OpenAITokenLogProb": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9778,13 +10169,16 @@
|
|||
"type": "integer"
|
||||
},
|
||||
"max_steps_per_epoch": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"default": 1
|
||||
},
|
||||
"gradient_accumulation_steps": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"default": 1
|
||||
},
|
||||
"max_validation_steps": {
|
||||
"type": "integer"
|
||||
"type": "integer",
|
||||
"default": 1
|
||||
},
|
||||
"data_config": {
|
||||
"$ref": "#/components/schemas/DataConfig"
|
||||
|
|
@ -9804,10 +10198,7 @@
|
|||
"required": [
|
||||
"n_epochs",
|
||||
"max_steps_per_epoch",
|
||||
"gradient_accumulation_steps",
|
||||
"max_validation_steps",
|
||||
"data_config",
|
||||
"optimizer_config"
|
||||
"gradient_accumulation_steps"
|
||||
],
|
||||
"title": "TrainingConfig"
|
||||
},
|
||||
|
|
@ -10983,8 +11374,7 @@
|
|||
"job_uuid",
|
||||
"training_config",
|
||||
"hyperparam_search_config",
|
||||
"logger_config",
|
||||
"model"
|
||||
"logger_config"
|
||||
],
|
||||
"title": "SupervisedFineTuneRequest"
|
||||
},
|
||||
|
|
@ -11174,7 +11564,9 @@
|
|||
"x-displayName": "Agents API for creating and interacting with agentic systems."
|
||||
},
|
||||
{
|
||||
"name": "BatchInference (Coming Soon)"
|
||||
"name": "BatchInference (Coming Soon)",
|
||||
"description": "This is an asynchronous API. If the request is successful, the response will be a job which can be polled for completion.\n\nNOTE: This API is not yet implemented and is subject to change in concert with other asynchronous APIs\nincluding (post-training, evals, etc).",
|
||||
"x-displayName": "Batch inference API for generating completions and chat completions."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks"
|
||||
|
|
|
|||
451
docs/_static/llama-stack-spec.yaml
vendored
451
docs/_static/llama-stack-spec.yaml
vendored
|
|
@ -40,7 +40,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/AppendRowsRequest'
|
||||
required: true
|
||||
/v1/batch-inference/chat-completion:
|
||||
/v1/inference/batch-chat-completion:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
|
@ -60,7 +60,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
- Inference
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -69,7 +69,7 @@ paths:
|
|||
schema:
|
||||
$ref: '#/components/schemas/BatchChatCompletionRequest'
|
||||
required: true
|
||||
/v1/batch-inference/completion:
|
||||
/v1/inference/batch-completion:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
|
|
@ -89,7 +89,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- BatchInference (Coming Soon)
|
||||
- Inference
|
||||
description: ''
|
||||
parameters: []
|
||||
requestBody:
|
||||
|
|
@ -148,7 +148,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
- BatchInference (Coming Soon)
|
||||
description: >-
|
||||
Generate a chat completion for the given messages using the specified model.
|
||||
parameters: []
|
||||
|
|
@ -183,7 +183,7 @@ paths:
|
|||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Inference
|
||||
- BatchInference (Coming Soon)
|
||||
description: >-
|
||||
Generate a completion for the given content using the specified model.
|
||||
parameters: []
|
||||
|
|
@ -2135,11 +2135,15 @@ paths:
|
|||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: OK
|
||||
description: >-
|
||||
Response from an OpenAI-compatible chat completion request. **OR** Chunk
|
||||
from a streaming response to an OpenAI-compatible chat completion request.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletion'
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletion'
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionChunk'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
|
|
@ -3009,6 +3013,54 @@ components:
|
|||
- tool_name
|
||||
- arguments
|
||||
title: ToolCall
|
||||
ToolConfig:
|
||||
type: object
|
||||
properties:
|
||||
tool_choice:
|
||||
oneOf:
|
||||
- type: string
|
||||
enum:
|
||||
- auto
|
||||
- required
|
||||
- none
|
||||
title: ToolChoice
|
||||
description: >-
|
||||
Whether tool use is required or automatic. This is a hint to the model
|
||||
which may not be followed. It depends on the Instruction Following
|
||||
capabilities of the model.
|
||||
- type: string
|
||||
default: auto
|
||||
description: >-
|
||||
(Optional) Whether tool use is automatic, required, or none. Can also
|
||||
specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
|
||||
tool_prompt_format:
|
||||
type: string
|
||||
enum:
|
||||
- json
|
||||
- function_tag
|
||||
- python_list
|
||||
description: >-
|
||||
(Optional) Instructs the model how to format tool calls. By default, Llama
|
||||
Stack will attempt to use a format that is best adapted to the model.
|
||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name>
|
||||
tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python
|
||||
syntax -- a list of function calls.
|
||||
system_message_behavior:
|
||||
type: string
|
||||
enum:
|
||||
- append
|
||||
- replace
|
||||
description: >-
|
||||
(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`:
|
||||
Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`:
|
||||
Replaces the default system prompt with the provided system message. The
|
||||
system message can include the string '{{function_definitions}}' to indicate
|
||||
where the function definitions should be inserted.
|
||||
default: append
|
||||
additionalProperties: false
|
||||
title: ToolConfig
|
||||
description: Configuration for tool use.
|
||||
ToolDefinition:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -3145,7 +3197,7 @@ components:
|
|||
BatchChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
model_id:
|
||||
type: string
|
||||
messages_batch:
|
||||
type: array
|
||||
|
|
@ -3159,26 +3211,8 @@ components:
|
|||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolDefinition'
|
||||
tool_choice:
|
||||
type: string
|
||||
enum:
|
||||
- auto
|
||||
- required
|
||||
- none
|
||||
title: ToolChoice
|
||||
description: >-
|
||||
Whether tool use is required or automatic. This is a hint to the model
|
||||
which may not be followed. It depends on the Instruction Following capabilities
|
||||
of the model.
|
||||
tool_prompt_format:
|
||||
type: string
|
||||
enum:
|
||||
- json
|
||||
- function_tag
|
||||
- python_list
|
||||
title: ToolPromptFormat
|
||||
description: >-
|
||||
Prompt format for calling custom / zero shot tools.
|
||||
tool_config:
|
||||
$ref: '#/components/schemas/ToolConfig'
|
||||
response_format:
|
||||
$ref: '#/components/schemas/ResponseFormat'
|
||||
logprobs:
|
||||
|
|
@ -3193,7 +3227,7 @@ components:
|
|||
title: LogProbConfig
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- model_id
|
||||
- messages_batch
|
||||
title: BatchChatCompletionRequest
|
||||
BatchChatCompletionResponse:
|
||||
|
|
@ -3261,7 +3295,7 @@ components:
|
|||
BatchCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
model:
|
||||
model_id:
|
||||
type: string
|
||||
content_batch:
|
||||
type: array
|
||||
|
|
@ -3283,7 +3317,7 @@ components:
|
|||
title: LogProbConfig
|
||||
additionalProperties: false
|
||||
required:
|
||||
- model
|
||||
- model_id
|
||||
- content_batch
|
||||
title: BatchCompletionRequest
|
||||
BatchCompletionResponse:
|
||||
|
|
@ -3335,54 +3369,6 @@ components:
|
|||
required:
|
||||
- job_uuid
|
||||
title: CancelTrainingJobRequest
|
||||
ToolConfig:
|
||||
type: object
|
||||
properties:
|
||||
tool_choice:
|
||||
oneOf:
|
||||
- type: string
|
||||
enum:
|
||||
- auto
|
||||
- required
|
||||
- none
|
||||
title: ToolChoice
|
||||
description: >-
|
||||
Whether tool use is required or automatic. This is a hint to the model
|
||||
which may not be followed. It depends on the Instruction Following
|
||||
capabilities of the model.
|
||||
- type: string
|
||||
default: auto
|
||||
description: >-
|
||||
(Optional) Whether tool use is automatic, required, or none. Can also
|
||||
specify a tool name to use a specific tool. Defaults to ToolChoice.auto.
|
||||
tool_prompt_format:
|
||||
type: string
|
||||
enum:
|
||||
- json
|
||||
- function_tag
|
||||
- python_list
|
||||
description: >-
|
||||
(Optional) Instructs the model how to format tool calls. By default, Llama
|
||||
Stack will attempt to use a format that is best adapted to the model.
|
||||
- `ToolPromptFormat.json`: The tool calls are formatted as a JSON object.
|
||||
- `ToolPromptFormat.function_tag`: The tool calls are enclosed in a <function=function_name>
|
||||
tag. - `ToolPromptFormat.python_list`: The tool calls are output as Python
|
||||
syntax -- a list of function calls.
|
||||
system_message_behavior:
|
||||
type: string
|
||||
enum:
|
||||
- append
|
||||
- replace
|
||||
description: >-
|
||||
(Optional) Config for how to override the default system prompt. - `SystemMessageBehavior.append`:
|
||||
Appends the provided system message to the default system prompt. - `SystemMessageBehavior.replace`:
|
||||
Replaces the default system prompt with the provided system message. The
|
||||
system message can include the string '{{function_definitions}}' to indicate
|
||||
where the function definitions should be inserted.
|
||||
default: append
|
||||
additionalProperties: false
|
||||
title: ToolConfig
|
||||
description: Configuration for tool use.
|
||||
ChatCompletionRequest:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -5481,6 +5467,11 @@ components:
|
|||
properties:
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- OK
|
||||
- Error
|
||||
- Not Implemented
|
||||
title: HealthStatus
|
||||
additionalProperties: false
|
||||
required:
|
||||
- status
|
||||
|
|
@ -5592,12 +5583,23 @@ components:
|
|||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
health:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- api
|
||||
- provider_id
|
||||
- provider_type
|
||||
- config
|
||||
- health
|
||||
title: ProviderInfo
|
||||
InvokeToolRequest:
|
||||
type: object
|
||||
|
|
@ -6075,7 +6077,11 @@ components:
|
|||
description: >-
|
||||
Must be "assistant" to identify this as the model's response
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||
description: The content of the model's response
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -6084,9 +6090,10 @@ components:
|
|||
tool_calls:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/ToolCall'
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||
description: >-
|
||||
List of tool calls. Each tool call is a ToolCall object.
|
||||
List of tool calls. Each tool call is an OpenAIChatCompletionToolCall
|
||||
object.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- role
|
||||
|
|
@ -6095,6 +6102,70 @@ components:
|
|||
description: >-
|
||||
A message containing the model's (assistant) response in an OpenAI-compatible
|
||||
chat completion request.
|
||||
"OpenAIChatCompletionContentPartImageParam":
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: image_url
|
||||
default: image_url
|
||||
image_url:
|
||||
$ref: '#/components/schemas/OpenAIImageURL'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- image_url
|
||||
title: >-
|
||||
OpenAIChatCompletionContentPartImageParam
|
||||
OpenAIChatCompletionContentPartParam:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||
- $ref: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
text: '#/components/schemas/OpenAIChatCompletionContentPartTextParam'
|
||||
image_url: '#/components/schemas/OpenAIChatCompletionContentPartImageParam'
|
||||
OpenAIChatCompletionContentPartTextParam:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: text
|
||||
default: text
|
||||
text:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- text
|
||||
title: OpenAIChatCompletionContentPartTextParam
|
||||
OpenAIChatCompletionToolCall:
|
||||
type: object
|
||||
properties:
|
||||
index:
|
||||
type: integer
|
||||
id:
|
||||
type: string
|
||||
type:
|
||||
type: string
|
||||
const: function
|
||||
default: function
|
||||
function:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCallFunction'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIChatCompletionToolCall
|
||||
OpenAIChatCompletionToolCallFunction:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
arguments:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: OpenAIChatCompletionToolCallFunction
|
||||
OpenAIDeveloperMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6105,7 +6176,11 @@ components:
|
|||
description: >-
|
||||
Must be "developer" to identify this as a developer message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||
description: The content of the developer message
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -6118,6 +6193,40 @@ components:
|
|||
title: OpenAIDeveloperMessageParam
|
||||
description: >-
|
||||
A message from the developer in an OpenAI-compatible chat completion request.
|
||||
OpenAIImageURL:
|
||||
type: object
|
||||
properties:
|
||||
url:
|
||||
type: string
|
||||
detail:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
required:
|
||||
- url
|
||||
title: OpenAIImageURL
|
||||
OpenAIJSONSchema:
|
||||
type: object
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
description:
|
||||
type: string
|
||||
strict:
|
||||
type: boolean
|
||||
schema:
|
||||
type: object
|
||||
additionalProperties:
|
||||
oneOf:
|
||||
- type: 'null'
|
||||
- type: boolean
|
||||
- type: number
|
||||
- type: string
|
||||
- type: array
|
||||
- type: object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- name
|
||||
title: OpenAIJSONSchema
|
||||
OpenAIMessageParam:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIUserMessageParam'
|
||||
|
|
@ -6133,6 +6242,53 @@ components:
|
|||
assistant: '#/components/schemas/OpenAIAssistantMessageParam'
|
||||
tool: '#/components/schemas/OpenAIToolMessageParam'
|
||||
developer: '#/components/schemas/OpenAIDeveloperMessageParam'
|
||||
OpenAIResponseFormatJSONObject:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: json_object
|
||||
default: json_object
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseFormatJSONObject
|
||||
OpenAIResponseFormatJSONSchema:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: json_schema
|
||||
default: json_schema
|
||||
json_schema:
|
||||
$ref: '#/components/schemas/OpenAIJSONSchema'
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
- json_schema
|
||||
title: OpenAIResponseFormatJSONSchema
|
||||
OpenAIResponseFormatParam:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseFormatText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseFormatJSONSchema'
|
||||
- $ref: '#/components/schemas/OpenAIResponseFormatJSONObject'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
text: '#/components/schemas/OpenAIResponseFormatText'
|
||||
json_schema: '#/components/schemas/OpenAIResponseFormatJSONSchema'
|
||||
json_object: '#/components/schemas/OpenAIResponseFormatJSONObject'
|
||||
OpenAIResponseFormatText:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: text
|
||||
default: text
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseFormatText
|
||||
OpenAISystemMessageParam:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6143,7 +6299,11 @@ components:
|
|||
description: >-
|
||||
Must be "system" to identify this as a system message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||
description: >-
|
||||
The content of the "system prompt". If multiple system messages are provided,
|
||||
they are concatenated. The underlying Llama Stack code may also add other
|
||||
|
|
@ -6173,7 +6333,11 @@ components:
|
|||
description: >-
|
||||
Unique identifier for the tool call this response is for
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||
description: The response content from the tool
|
||||
additionalProperties: false
|
||||
required:
|
||||
|
|
@ -6194,7 +6358,11 @@ components:
|
|||
description: >-
|
||||
Must be "user" to identify this as a user message
|
||||
content:
|
||||
$ref: '#/components/schemas/InterleavedContent'
|
||||
oneOf:
|
||||
- type: string
|
||||
- type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionContentPartParam'
|
||||
description: >-
|
||||
The content of the message, which can include text and other media
|
||||
name:
|
||||
|
|
@ -6280,9 +6448,7 @@ components:
|
|||
description: >-
|
||||
(Optional) The penalty for repeated tokens
|
||||
response_format:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
$ref: '#/components/schemas/OpenAIResponseFormatParam'
|
||||
description: (Optional) The response format to use
|
||||
seed:
|
||||
type: integer
|
||||
|
|
@ -6388,6 +6554,41 @@ components:
|
|||
title: OpenAIChatCompletion
|
||||
description: >-
|
||||
Response from an OpenAI-compatible chat completion request.
|
||||
OpenAIChatCompletionChunk:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: The ID of the chat completion
|
||||
choices:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChunkChoice'
|
||||
description: List of choices
|
||||
object:
|
||||
type: string
|
||||
const: chat.completion.chunk
|
||||
default: chat.completion.chunk
|
||||
description: >-
|
||||
The object type, which will be "chat.completion.chunk"
|
||||
created:
|
||||
type: integer
|
||||
description: >-
|
||||
The Unix timestamp in seconds when the chat completion was created
|
||||
model:
|
||||
type: string
|
||||
description: >-
|
||||
The model that was used to generate the chat completion
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- choices
|
||||
- object
|
||||
- created
|
||||
- model
|
||||
title: OpenAIChatCompletionChunk
|
||||
description: >-
|
||||
Chunk from a streaming response to an OpenAI-compatible chat completion request.
|
||||
OpenAIChoice:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6399,8 +6600,11 @@ components:
|
|||
description: The reason the model stopped generating
|
||||
index:
|
||||
type: integer
|
||||
description: The index of the choice
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- message
|
||||
|
|
@ -6409,6 +6613,27 @@ components:
|
|||
title: OpenAIChoice
|
||||
description: >-
|
||||
A choice from an OpenAI-compatible chat completion response.
|
||||
OpenAIChoiceDelta:
|
||||
type: object
|
||||
properties:
|
||||
content:
|
||||
type: string
|
||||
description: (Optional) The content of the delta
|
||||
refusal:
|
||||
type: string
|
||||
description: (Optional) The refusal of the delta
|
||||
role:
|
||||
type: string
|
||||
description: (Optional) The role of the delta
|
||||
tool_calls:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAIChatCompletionToolCall'
|
||||
description: (Optional) The tool calls of the delta
|
||||
additionalProperties: false
|
||||
title: OpenAIChoiceDelta
|
||||
description: >-
|
||||
A delta from an OpenAI-compatible chat completion streaming response.
|
||||
OpenAIChoiceLogprobs:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6416,15 +6641,43 @@ components:
|
|||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
refusal:
|
||||
type: array
|
||||
items:
|
||||
$ref: '#/components/schemas/OpenAITokenLogProb'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
title: OpenAIChoiceLogprobs
|
||||
description: >-
|
||||
The log probabilities for the tokens in the message from an OpenAI-compatible
|
||||
chat completion response.
|
||||
OpenAIChunkChoice:
|
||||
type: object
|
||||
properties:
|
||||
delta:
|
||||
$ref: '#/components/schemas/OpenAIChoiceDelta'
|
||||
description: The delta from the chunk
|
||||
finish_reason:
|
||||
type: string
|
||||
description: The reason the model stopped generating
|
||||
index:
|
||||
type: integer
|
||||
description: The index of the choice
|
||||
logprobs:
|
||||
$ref: '#/components/schemas/OpenAIChoiceLogprobs'
|
||||
description: >-
|
||||
(Optional) The log probabilities for the tokens in the message
|
||||
additionalProperties: false
|
||||
required:
|
||||
- delta
|
||||
- finish_reason
|
||||
- index
|
||||
title: OpenAIChunkChoice
|
||||
description: >-
|
||||
A chunk choice from an OpenAI-compatible chat completion streaming response.
|
||||
OpenAITokenLogProb:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6744,10 +6997,13 @@ components:
|
|||
type: integer
|
||||
max_steps_per_epoch:
|
||||
type: integer
|
||||
default: 1
|
||||
gradient_accumulation_steps:
|
||||
type: integer
|
||||
default: 1
|
||||
max_validation_steps:
|
||||
type: integer
|
||||
default: 1
|
||||
data_config:
|
||||
$ref: '#/components/schemas/DataConfig'
|
||||
optimizer_config:
|
||||
|
|
@ -6762,9 +7018,6 @@ components:
|
|||
- n_epochs
|
||||
- max_steps_per_epoch
|
||||
- gradient_accumulation_steps
|
||||
- max_validation_steps
|
||||
- data_config
|
||||
- optimizer_config
|
||||
title: TrainingConfig
|
||||
PreferenceOptimizeRequest:
|
||||
type: object
|
||||
|
|
@ -7498,7 +7751,6 @@ components:
|
|||
- training_config
|
||||
- hyperparam_search_config
|
||||
- logger_config
|
||||
- model
|
||||
title: SupervisedFineTuneRequest
|
||||
SyntheticDataGenerateRequest:
|
||||
type: object
|
||||
|
|
@ -7633,6 +7885,17 @@ tags:
|
|||
x-displayName: >-
|
||||
Agents API for creating and interacting with agentic systems.
|
||||
- name: BatchInference (Coming Soon)
|
||||
description: >-
|
||||
This is an asynchronous API. If the request is successful, the response will
|
||||
be a job which can be polled for completion.
|
||||
|
||||
|
||||
NOTE: This API is not yet implemented and is subject to change in concert with
|
||||
other asynchronous APIs
|
||||
|
||||
including (post-training, evals, etc).
|
||||
x-displayName: >-
|
||||
Batch inference API for generating completions and chat completions.
|
||||
- name: Benchmarks
|
||||
- name: DatasetIO
|
||||
- name: Datasets
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ options:
|
|||
-h, --help show this help message and exit
|
||||
--port PORT Port to run the server on. It can also be passed via the env var LLAMA_STACK_PORT. (default: 8321)
|
||||
--image-name IMAGE_NAME
|
||||
Name of the image to run. Defaults to the current conda environment (default: None)
|
||||
Name of the image to run. Defaults to the current environment (default: None)
|
||||
--disable-ipv6 Disable IPv6 support (default: False)
|
||||
--env KEY=VALUE Environment variables to pass to the server in KEY=VALUE format. Can be specified multiple times. (default: [])
|
||||
--tls-keyfile TLS_KEYFILE
|
||||
|
|
|
|||
88
docs/source/distributions/remote_hosted_distro/nvidia.md
Normal file
88
docs/source/distributions/remote_hosted_distro/nvidia.md
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
<!-- This file was auto-generated by distro_codegen.py, please edit source -->
|
||||
# NVIDIA Distribution
|
||||
|
||||
The `llamastack/distribution-nvidia` distribution consists of the following provider configurations.
|
||||
|
||||
| API | Provider(s) |
|
||||
|-----|-------------|
|
||||
| agents | `inline::meta-reference` |
|
||||
| datasetio | `inline::localfs` |
|
||||
| eval | `inline::meta-reference` |
|
||||
| inference | `remote::nvidia` |
|
||||
| post_training | `remote::nvidia` |
|
||||
| safety | `remote::nvidia` |
|
||||
| scoring | `inline::basic` |
|
||||
| telemetry | `inline::meta-reference` |
|
||||
| tool_runtime | `inline::rag-runtime` |
|
||||
| vector_io | `inline::faiss` |
|
||||
|
||||
|
||||
### Environment Variables
|
||||
|
||||
The following environment variables can be configured:
|
||||
|
||||
- `NVIDIA_API_KEY`: NVIDIA API Key (default: ``)
|
||||
- `NVIDIA_USER_ID`: NVIDIA User ID (default: `llama-stack-user`)
|
||||
- `NVIDIA_DATASET_NAMESPACE`: NVIDIA Dataset Namespace (default: `default`)
|
||||
- `NVIDIA_ACCESS_POLICIES`: NVIDIA Access Policies (default: `{}`)
|
||||
- `NVIDIA_PROJECT_ID`: NVIDIA Project ID (default: `test-project`)
|
||||
- `NVIDIA_CUSTOMIZER_URL`: NVIDIA Customizer URL (default: `https://customizer.api.nvidia.com`)
|
||||
- `NVIDIA_OUTPUT_MODEL_DIR`: NVIDIA Output Model Directory (default: `test-example-model@v1`)
|
||||
- `GUARDRAILS_SERVICE_URL`: URL for the NeMo Guardrails Service (default: `http://0.0.0.0:7331`)
|
||||
- `INFERENCE_MODEL`: Inference model (default: `Llama3.1-8B-Instruct`)
|
||||
- `SAFETY_MODEL`: Name of the model to use for safety (default: `meta/llama-3.1-8b-instruct`)
|
||||
|
||||
### Models
|
||||
|
||||
The following models are available by default:
|
||||
|
||||
- `meta/llama3-8b-instruct (aliases: meta-llama/Llama-3-8B-Instruct)`
|
||||
- `meta/llama3-70b-instruct (aliases: meta-llama/Llama-3-70B-Instruct)`
|
||||
- `meta/llama-3.1-8b-instruct (aliases: meta-llama/Llama-3.1-8B-Instruct)`
|
||||
- `meta/llama-3.1-70b-instruct (aliases: meta-llama/Llama-3.1-70B-Instruct)`
|
||||
- `meta/llama-3.1-405b-instruct (aliases: meta-llama/Llama-3.1-405B-Instruct-FP8)`
|
||||
- `meta/llama-3.2-1b-instruct (aliases: meta-llama/Llama-3.2-1B-Instruct)`
|
||||
- `meta/llama-3.2-3b-instruct (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||
- `meta/llama-3.2-11b-vision-instruct (aliases: meta-llama/Llama-3.2-11B-Vision-Instruct)`
|
||||
- `meta/llama-3.2-90b-vision-instruct (aliases: meta-llama/Llama-3.2-90B-Vision-Instruct)`
|
||||
- `nvidia/llama-3.2-nv-embedqa-1b-v2 `
|
||||
- `nvidia/nv-embedqa-e5-v5 `
|
||||
- `nvidia/nv-embedqa-mistral-7b-v2 `
|
||||
- `snowflake/arctic-embed-l `
|
||||
|
||||
|
||||
### Prerequisite: API Keys
|
||||
|
||||
Make sure you have access to a NVIDIA API Key. You can get one by visiting [https://build.nvidia.com/](https://build.nvidia.com/).
|
||||
|
||||
|
||||
## Running Llama Stack with NVIDIA
|
||||
|
||||
You can do this via Conda (build code) or Docker which has a pre-built image.
|
||||
|
||||
### Via Docker
|
||||
|
||||
This method allows you to get started quickly without having to build the distribution code.
|
||||
|
||||
```bash
|
||||
LLAMA_STACK_PORT=8321
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
llamastack/distribution-nvidia \
|
||||
--yaml-config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
```
|
||||
|
||||
### Via Conda
|
||||
|
||||
```bash
|
||||
llama stack build --template nvidia --image-type conda
|
||||
llama stack run ./run.yaml \
|
||||
--port 8321 \
|
||||
--env NVIDIA_API_KEY=$NVIDIA_API_KEY
|
||||
--env INFERENCE_MODEL=$INFERENCE_MODEL
|
||||
```
|
||||
|
|
@ -43,7 +43,9 @@ The following models are available by default:
|
|||
- `groq/llama-3.3-70b-versatile (aliases: meta-llama/Llama-3.3-70B-Instruct)`
|
||||
- `groq/llama-3.2-3b-preview (aliases: meta-llama/Llama-3.2-3B-Instruct)`
|
||||
- `groq/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
|
||||
- `groq/meta-llama/llama-4-scout-17b-16e-instruct (aliases: meta-llama/Llama-4-Scout-17B-16E-Instruct)`
|
||||
- `groq/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
|
||||
- `groq/meta-llama/llama-4-maverick-17b-128e-instruct (aliases: meta-llama/Llama-4-Maverick-17B-128E-Instruct)`
|
||||
|
||||
|
||||
### Prerequisite: API Keys
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue