Merge 405d0e8001 into sapling-pr-archive-ehhuang

This commit is contained in:
ehhuang 2025-10-22 14:19:44 -07:00 committed by GitHub
commit 4e13e6f272
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 377 additions and 566 deletions

View file

@ -350,146 +350,46 @@ paths:
in: query in: query
description: >- description: >-
An item ID to list items after, used in pagination. An item ID to list items after, used in pagination.
required: true required: false
schema: schema:
oneOf: type: string
- type: string
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: include - name: include
in: query in: query
description: >- description: >-
Specify additional output data to include in the response. Specify additional output data to include in the response.
required: true required: false
schema: schema:
oneOf: type: array
- type: array
items: items:
type: string type: string
enum: enum:
- web_search_call.action.sources
- code_interpreter_call.outputs - code_interpreter_call.outputs
- computer_call_output.output.image_url - computer_call_output.output.image_url
- file_search_call.results - file_search_call.results
- message.input_image.image_url - message.input_image.image_url
- message.output_text.logprobs - message.output_text.logprobs
- reasoning.encrypted_content - reasoning.encrypted_content
- type: object title: ConversationItemInclude
title: NotGiven
description: >- description: >-
A sentinel singleton class used to distinguish omitted keyword arguments Specify additional output data to include in the model response.
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: limit - name: limit
in: query in: query
description: >- description: >-
A limit on the number of objects to be returned (1-100, default 20). A limit on the number of objects to be returned (1-100, default 20).
required: true required: false
schema: schema:
oneOf: type: integer
- type: integer
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: order - name: order
in: query in: query
description: >- description: >-
The order to return items in (asc or desc, default desc). The order to return items in (asc or desc, default desc).
required: true required: false
schema: schema:
oneOf: type: string
- type: string
enum: enum:
- asc - asc
- desc - desc
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
deprecated: false deprecated: false
post: post:
responses: responses:
@ -6482,6 +6382,7 @@ components:
enum: enum:
- llm - llm
- embedding - embedding
- rerank
title: ModelType title: ModelType
description: >- description: >-
Enumeration of supported model types in Llama Stack. Enumeration of supported model types in Llama Stack.
@ -13585,13 +13486,16 @@ tags:
embeddings. embeddings.
This API provides the raw interface to the underlying models. Two kinds of models This API provides the raw interface to the underlying models. Three kinds of
are supported: models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic - Embedding models: these models generate embeddings to be used for semantic
search. search.
- Rerank models: these models reorder the documents based on their relevance
to a query.
x-displayName: Inference x-displayName: Inference
- name: Inspect - name: Inspect
description: >- description: >-

View file

@ -3,9 +3,10 @@ description: "Inference
Llama Stack Inference API for generating completions, chat completions, and embeddings. Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported: This API provides the raw interface to the underlying models. Three kinds of models are supported:
- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions. - LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search." - Embedding models: these models generate embeddings to be used for semantic search.
- Rerank models: these models reorder the documents based on their relevance to a query."
sidebar_label: Inference sidebar_label: Inference
title: Inference title: Inference
--- ---
@ -18,8 +19,9 @@ Inference
Llama Stack Inference API for generating completions, chat completions, and embeddings. Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported: This API provides the raw interface to the underlying models. Three kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search. - Embedding models: these models generate embeddings to be used for semantic search.
- Rerank models: these models reorder the documents based on their relevance to a query.
This section contains documentation for all available providers for the **inference** API. This section contains documentation for all available providers for the **inference** API.

View file

@ -13467,7 +13467,7 @@
}, },
{ {
"name": "Inference", "name": "Inference",
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
"x-displayName": "Inference" "x-displayName": "Inference"
}, },
{ {

View file

@ -10218,13 +10218,16 @@ tags:
embeddings. embeddings.
This API provides the raw interface to the underlying models. Two kinds of models This API provides the raw interface to the underlying models. Three kinds of
are supported: models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic - Embedding models: these models generate embeddings to be used for semantic
search. search.
- Rerank models: these models reorder the documents based on their relevance
to a query.
x-displayName: Inference x-displayName: Inference
- name: Models - name: Models
description: '' description: ''

View file

@ -483,87 +483,54 @@
"name": "after", "name": "after",
"in": "query", "in": "query",
"description": "An item ID to list items after, used in pagination.", "description": "An item ID to list items after, used in pagination.",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "string" "type": "string"
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "include", "name": "include",
"in": "query", "in": "query",
"description": "Specify additional output data to include in the response.", "description": "Specify additional output data to include in the response.",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "array", "type": "array",
"items": { "items": {
"type": "string", "type": "string",
"enum": [ "enum": [
"web_search_call.action.sources",
"code_interpreter_call.outputs", "code_interpreter_call.outputs",
"computer_call_output.output.image_url", "computer_call_output.output.image_url",
"file_search_call.results", "file_search_call.results",
"message.input_image.image_url", "message.input_image.image_url",
"message.output_text.logprobs", "message.output_text.logprobs",
"reasoning.encrypted_content" "reasoning.encrypted_content"
] ],
"title": "ConversationItemInclude",
"description": "Specify additional output data to include in the model response."
} }
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "limit", "name": "limit",
"in": "query", "in": "query",
"description": "A limit on the number of objects to be returned (1-100, default 20).", "description": "A limit on the number of objects to be returned (1-100, default 20).",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "integer" "type": "integer"
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "order", "name": "order",
"in": "query", "in": "query",
"description": "The order to return items in (asc or desc, default desc).", "description": "The order to return items in (asc or desc, default desc).",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "string", "type": "string",
"enum": [ "enum": [
"asc", "asc",
"desc" "desc"
] ]
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
} }
], ],
@ -6859,7 +6826,8 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"llm", "llm",
"embedding" "embedding",
"rerank"
], ],
"title": "ModelType", "title": "ModelType",
"description": "Enumeration of supported model types in Llama Stack." "description": "Enumeration of supported model types in Llama Stack."
@ -13269,7 +13237,7 @@
}, },
{ {
"name": "Inference", "name": "Inference",
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
"x-displayName": "Inference" "x-displayName": "Inference"
}, },
{ {

View file

@ -347,146 +347,46 @@ paths:
in: query in: query
description: >- description: >-
An item ID to list items after, used in pagination. An item ID to list items after, used in pagination.
required: true required: false
schema: schema:
oneOf: type: string
- type: string
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: include - name: include
in: query in: query
description: >- description: >-
Specify additional output data to include in the response. Specify additional output data to include in the response.
required: true required: false
schema: schema:
oneOf: type: array
- type: array
items: items:
type: string type: string
enum: enum:
- web_search_call.action.sources
- code_interpreter_call.outputs - code_interpreter_call.outputs
- computer_call_output.output.image_url - computer_call_output.output.image_url
- file_search_call.results - file_search_call.results
- message.input_image.image_url - message.input_image.image_url
- message.output_text.logprobs - message.output_text.logprobs
- reasoning.encrypted_content - reasoning.encrypted_content
- type: object title: ConversationItemInclude
title: NotGiven
description: >- description: >-
A sentinel singleton class used to distinguish omitted keyword arguments Specify additional output data to include in the model response.
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: limit - name: limit
in: query in: query
description: >- description: >-
A limit on the number of objects to be returned (1-100, default 20). A limit on the number of objects to be returned (1-100, default 20).
required: true required: false
schema: schema:
oneOf: type: integer
- type: integer
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: order - name: order
in: query in: query
description: >- description: >-
The order to return items in (asc or desc, default desc). The order to return items in (asc or desc, default desc).
required: true required: false
schema: schema:
oneOf: type: string
- type: string
enum: enum:
- asc - asc
- desc - desc
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
deprecated: false deprecated: false
post: post:
responses: responses:
@ -5269,6 +5169,7 @@ components:
enum: enum:
- llm - llm
- embedding - embedding
- rerank
title: ModelType title: ModelType
description: >- description: >-
Enumeration of supported model types in Llama Stack. Enumeration of supported model types in Llama Stack.
@ -10190,13 +10091,16 @@ tags:
embeddings. embeddings.
This API provides the raw interface to the underlying models. Two kinds of models This API provides the raw interface to the underlying models. Three kinds of
are supported: models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic - Embedding models: these models generate embeddings to be used for semantic
search. search.
- Rerank models: these models reorder the documents based on their relevance
to a query.
x-displayName: Inference x-displayName: Inference
- name: Inspect - name: Inspect
description: >- description: >-

View file

@ -483,87 +483,54 @@
"name": "after", "name": "after",
"in": "query", "in": "query",
"description": "An item ID to list items after, used in pagination.", "description": "An item ID to list items after, used in pagination.",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "string" "type": "string"
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "include", "name": "include",
"in": "query", "in": "query",
"description": "Specify additional output data to include in the response.", "description": "Specify additional output data to include in the response.",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "array", "type": "array",
"items": { "items": {
"type": "string", "type": "string",
"enum": [ "enum": [
"web_search_call.action.sources",
"code_interpreter_call.outputs", "code_interpreter_call.outputs",
"computer_call_output.output.image_url", "computer_call_output.output.image_url",
"file_search_call.results", "file_search_call.results",
"message.input_image.image_url", "message.input_image.image_url",
"message.output_text.logprobs", "message.output_text.logprobs",
"reasoning.encrypted_content" "reasoning.encrypted_content"
] ],
"title": "ConversationItemInclude",
"description": "Specify additional output data to include in the model response."
} }
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "limit", "name": "limit",
"in": "query", "in": "query",
"description": "A limit on the number of objects to be returned (1-100, default 20).", "description": "A limit on the number of objects to be returned (1-100, default 20).",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "integer" "type": "integer"
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
}, },
{ {
"name": "order", "name": "order",
"in": "query", "in": "query",
"description": "The order to return items in (asc or desc, default desc).", "description": "The order to return items in (asc or desc, default desc).",
"required": true, "required": false,
"schema": { "schema": {
"oneOf": [
{
"type": "string", "type": "string",
"enum": [ "enum": [
"asc", "asc",
"desc" "desc"
] ]
},
{
"type": "object",
"title": "NotGiven",
"description": "A sentinel singleton class used to distinguish omitted keyword arguments from those passed in with the value None (which may have different behavior).\nFor example:\n\n```py\ndef get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response: ...\n\n\nget(timeout=1) # 1s timeout\nget(timeout=None) # No timeout\nget() # Default timeout behavior, which may not be statically known at the method definition.\n```"
}
]
} }
} }
], ],
@ -8531,7 +8498,8 @@
"type": "string", "type": "string",
"enum": [ "enum": [
"llm", "llm",
"embedding" "embedding",
"rerank"
], ],
"title": "ModelType", "title": "ModelType",
"description": "Enumeration of supported model types in Llama Stack." "description": "Enumeration of supported model types in Llama Stack."
@ -17959,7 +17927,7 @@
}, },
{ {
"name": "Inference", "name": "Inference",
"description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Two kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.", "description": "Llama Stack Inference API for generating completions, chat completions, and embeddings.\n\nThis API provides the raw interface to the underlying models. Three kinds of models are supported:\n- LLM models: these models generate \"raw\" and \"chat\" (conversational) completions.\n- Embedding models: these models generate embeddings to be used for semantic search.\n- Rerank models: these models reorder the documents based on their relevance to a query.",
"x-displayName": "Inference" "x-displayName": "Inference"
}, },
{ {

View file

@ -350,146 +350,46 @@ paths:
in: query in: query
description: >- description: >-
An item ID to list items after, used in pagination. An item ID to list items after, used in pagination.
required: true required: false
schema: schema:
oneOf: type: string
- type: string
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: include - name: include
in: query in: query
description: >- description: >-
Specify additional output data to include in the response. Specify additional output data to include in the response.
required: true required: false
schema: schema:
oneOf: type: array
- type: array
items: items:
type: string type: string
enum: enum:
- web_search_call.action.sources
- code_interpreter_call.outputs - code_interpreter_call.outputs
- computer_call_output.output.image_url - computer_call_output.output.image_url
- file_search_call.results - file_search_call.results
- message.input_image.image_url - message.input_image.image_url
- message.output_text.logprobs - message.output_text.logprobs
- reasoning.encrypted_content - reasoning.encrypted_content
- type: object title: ConversationItemInclude
title: NotGiven
description: >- description: >-
A sentinel singleton class used to distinguish omitted keyword arguments Specify additional output data to include in the model response.
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: limit - name: limit
in: query in: query
description: >- description: >-
A limit on the number of objects to be returned (1-100, default 20). A limit on the number of objects to be returned (1-100, default 20).
required: true required: false
schema: schema:
oneOf: type: integer
- type: integer
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
- name: order - name: order
in: query in: query
description: >- description: >-
The order to return items in (asc or desc, default desc). The order to return items in (asc or desc, default desc).
required: true required: false
schema: schema:
oneOf: type: string
- type: string
enum: enum:
- asc - asc
- desc - desc
- type: object
title: NotGiven
description: >-
A sentinel singleton class used to distinguish omitted keyword arguments
from those passed in with the value None (which may have different
behavior).
For example:
```py
def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
...
get(timeout=1) # 1s timeout
get(timeout=None) # No timeout
get() # Default timeout behavior, which may not be statically known
at the method definition.
```
deprecated: false deprecated: false
post: post:
responses: responses:
@ -6482,6 +6382,7 @@ components:
enum: enum:
- llm - llm
- embedding - embedding
- rerank
title: ModelType title: ModelType
description: >- description: >-
Enumeration of supported model types in Llama Stack. Enumeration of supported model types in Llama Stack.
@ -13585,13 +13486,16 @@ tags:
embeddings. embeddings.
This API provides the raw interface to the underlying models. Two kinds of models This API provides the raw interface to the underlying models. Three kinds of
are supported: models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic - Embedding models: these models generate embeddings to be used for semantic
search. search.
- Rerank models: these models reorder the documents based on their relevance
to a query.
x-displayName: Inference x-displayName: Inference
- name: Inspect - name: Inspect
description: >- description: >-

View file

@ -4,11 +4,9 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from enum import StrEnum
from typing import Annotated, Literal, Protocol, runtime_checkable from typing import Annotated, Literal, Protocol, runtime_checkable
from openai import NOT_GIVEN
from openai._types import NotGiven
from openai.types.responses.response_includable import ResponseIncludable
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.agents.openai_responses import ( from llama_stack.apis.agents.openai_responses import (
@ -150,6 +148,20 @@ class ConversationItemCreateRequest(BaseModel):
) )
class ConversationItemInclude(StrEnum):
"""
Specify additional output data to include in the model response.
"""
web_search_call_action_sources = "web_search_call.action.sources"
code_interpreter_call_outputs = "code_interpreter_call.outputs"
computer_call_output_output_image_url = "computer_call_output.output.image_url"
file_search_call_results = "file_search_call.results"
message_input_image_image_url = "message.input_image.image_url"
message_output_text_logprobs = "message.output_text.logprobs"
reasoning_encrypted_content = "reasoning.encrypted_content"
@json_schema_type @json_schema_type
class ConversationItemList(BaseModel): class ConversationItemList(BaseModel):
"""List of conversation items with pagination.""" """List of conversation items with pagination."""
@ -250,13 +262,13 @@ class Conversations(Protocol):
... ...
@webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/conversations/{conversation_id}/items", method="GET", level=LLAMA_STACK_API_V1)
async def list( async def list_items(
self, self,
conversation_id: str, conversation_id: str,
after: str | NotGiven = NOT_GIVEN, after: str | None = None,
include: list[ResponseIncludable] | NotGiven = NOT_GIVEN, include: list[ConversationItemInclude] | None = None,
limit: int | NotGiven = NOT_GIVEN, limit: int | None = None,
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN, order: Literal["asc", "desc"] | None = None,
) -> ConversationItemList: ) -> ConversationItemList:
"""List items. """List items.

View file

@ -1234,9 +1234,10 @@ class Inference(InferenceProvider):
Llama Stack Inference API for generating completions, chat completions, and embeddings. Llama Stack Inference API for generating completions, chat completions, and embeddings.
This API provides the raw interface to the underlying models. Two kinds of models are supported: This API provides the raw interface to the underlying models. Three kinds of models are supported:
- LLM models: these models generate "raw" and "chat" (conversational) completions. - LLM models: these models generate "raw" and "chat" (conversational) completions.
- Embedding models: these models generate embeddings to be used for semantic search. - Embedding models: these models generate embeddings to be used for semantic search.
- Rerank models: these models reorder the documents based on their relevance to a query.
""" """
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) @webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)

View file

@ -27,10 +27,12 @@ class ModelType(StrEnum):
"""Enumeration of supported model types in Llama Stack. """Enumeration of supported model types in Llama Stack.
:cvar llm: Large language model for text generation and completion :cvar llm: Large language model for text generation and completion
:cvar embedding: Embedding model for converting text to vector representations :cvar embedding: Embedding model for converting text to vector representations
:cvar rerank: Reranking model for reordering documents based on their relevance to a query
""" """
llm = "llm" llm = "llm"
embedding = "embedding" embedding = "embedding"
rerank = "rerank"
@json_schema_type @json_schema_type

View file

@ -6,9 +6,8 @@
import secrets import secrets
import time import time
from typing import Any from typing import Any, Literal
from openai import NOT_GIVEN
from pydantic import BaseModel, TypeAdapter from pydantic import BaseModel, TypeAdapter
from llama_stack.apis.conversations.conversations import ( from llama_stack.apis.conversations.conversations import (
@ -16,6 +15,7 @@ from llama_stack.apis.conversations.conversations import (
ConversationDeletedResource, ConversationDeletedResource,
ConversationItem, ConversationItem,
ConversationItemDeletedResource, ConversationItemDeletedResource,
ConversationItemInclude,
ConversationItemList, ConversationItemList,
Conversations, Conversations,
Metadata, Metadata,
@ -247,7 +247,14 @@ class ConversationServiceImpl(Conversations):
adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem) adapter: TypeAdapter[ConversationItem] = TypeAdapter(ConversationItem)
return adapter.validate_python(record["item_data"]) return adapter.validate_python(record["item_data"])
async def list(self, conversation_id: str, after=NOT_GIVEN, include=NOT_GIVEN, limit=NOT_GIVEN, order=NOT_GIVEN): async def list_items(
self,
conversation_id: str,
after: str | None = None,
include: list[ConversationItemInclude] | None = None,
limit: int | None = None,
order: Literal["asc", "desc"] | None = None,
) -> ConversationItemList:
"""List items in the conversation.""" """List items in the conversation."""
if not conversation_id: if not conversation_id:
raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}") raise ValueError(f"Expected a non-empty value for `conversation_id` but received {conversation_id!r}")
@ -258,14 +265,12 @@ class ConversationServiceImpl(Conversations):
result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id}) result = await self.sql_store.fetch_all(table="conversation_items", where={"conversation_id": conversation_id})
records = result.data records = result.data
if order != NOT_GIVEN and order == "asc": if order is not None and order == "asc":
records.sort(key=lambda x: x["created_at"]) records.sort(key=lambda x: x["created_at"])
else: else:
records.sort(key=lambda x: x["created_at"], reverse=True) records.sort(key=lambda x: x["created_at"], reverse=True)
actual_limit = 20 actual_limit = limit or 20
if limit != NOT_GIVEN and isinstance(limit, int):
actual_limit = limit
records = records[:actual_limit] records = records[:actual_limit]
items = [record["item_data"] for record in records] items = [record["item_data"] for record in records]

View file

@ -44,9 +44,14 @@ from llama_stack.apis.inference import (
OpenAIEmbeddingsResponse, OpenAIEmbeddingsResponse,
OpenAIMessageParam, OpenAIMessageParam,
Order, Order,
RerankResponse,
StopReason, StopReason,
ToolPromptFormat, ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam,
)
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import Model, ModelType
from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry from llama_stack.apis.telemetry import MetricEvent, MetricInResponse, Telemetry
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -182,6 +187,23 @@ class InferenceRouter(Inference):
raise ModelTypeError(model_id, model.model_type, expected_model_type) raise ModelTypeError(model_id, model.model_type, expected_model_type)
return model return model
async def rerank(
self,
model: str,
query: str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam,
items: list[str | OpenAIChatCompletionContentPartTextParam | OpenAIChatCompletionContentPartImageParam],
max_num_results: int | None = None,
) -> RerankResponse:
logger.debug(f"InferenceRouter.rerank: {model}")
model_obj = await self._get_model(model, ModelType.rerank)
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
return await provider.rerank(
model=model_obj.identifier,
query=query,
items=items,
max_num_results=max_num_results,
)
async def openai_completion( async def openai_completion(
self, self,
params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)], params: Annotated[OpenAICompletionRequestWithExtraBody, Body(...)],

View file

@ -65,12 +65,16 @@ class SafetyRouter(Safety):
"""Get Shield id from model (provider_resource_id) of shield.""" """Get Shield id from model (provider_resource_id) of shield."""
list_shields_response = await self.routing_table.list_shields() list_shields_response = await self.routing_table.list_shields()
matches = [s.identifier for s in list_shields_response.data if model == s.provider_resource_id] matches: list[str] = [s.identifier for s in list_shields_response.data if model == s.provider_resource_id]
if not matches: if not matches:
raise ValueError(f"No shield associated with provider_resource id {model}") raise ValueError(
f"No shield associated with provider_resource id {model}: choose from {[s.provider_resource_id for s in list_shields_response.data]}"
)
if len(matches) > 1: if len(matches) > 1:
raise ValueError(f"Multiple shields associated with provider_resource id {model}") raise ValueError(
f"Multiple shields associated with provider_resource id {model}: matched shields {matches}"
)
return matches[0] return matches[0]
shield_id = await get_shield_id(self, model) shield_id = await get_shield_id(self, model)

View file

@ -137,7 +137,8 @@ class CustomRichHandler(RichHandler):
# Set a reasonable default width for console output, especially when redirected to files # Set a reasonable default width for console output, especially when redirected to files
console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120")) console_width = int(os.environ.get("LLAMA_STACK_LOG_WIDTH", "120"))
# Don't force terminal codes to avoid ANSI escape codes in log files # Don't force terminal codes to avoid ANSI escape codes in log files
kwargs["console"] = Console(width=console_width) # Ensure logs go to stderr, not stdout
kwargs["console"] = Console(width=console_width, stderr=True)
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def emit(self, record): def emit(self, record):
@ -177,6 +178,7 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
log_file (str | None): Path to a log file to additionally pipe the logs into. log_file (str | None): Path to a log file to additionally pipe the logs into.
If None, reads from LLAMA_STACK_LOG_FILE environment variable. If None, reads from LLAMA_STACK_LOG_FILE environment variable.
""" """
global _category_levels
# Read from environment variables if not explicitly provided # Read from environment variables if not explicitly provided
if category_levels is None: if category_levels is None:
category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL) category_levels = dict.fromkeys(CATEGORIES, DEFAULT_LOG_LEVEL)
@ -184,6 +186,9 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
if env_config: if env_config:
category_levels.update(parse_environment_config(env_config)) category_levels.update(parse_environment_config(env_config))
# Update the module-level _category_levels so that already-created loggers pick up the new levels
_category_levels.update(category_levels)
if log_file is None: if log_file is None:
log_file = os.environ.get("LLAMA_STACK_LOG_FILE") log_file = os.environ.get("LLAMA_STACK_LOG_FILE")
log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s" log_format = "%(asctime)s %(name)s:%(lineno)d %(category)s: %(message)s"
@ -268,13 +273,17 @@ def setup_logging(category_levels: dict[str, int] | None = None, log_file: str |
} }
dictConfig(logging_config) dictConfig(logging_config)
# Ensure third-party libraries follow the root log level, but preserve # Update log levels for all loggers that were created before setup_logging was called
# already-configured loggers (e.g., uvicorn) and our own llama_stack loggers
for name, logger in logging.root.manager.loggerDict.items(): for name, logger in logging.root.manager.loggerDict.items():
if isinstance(logger, logging.Logger): if isinstance(logger, logging.Logger):
# Skip infrastructure loggers (uvicorn, fastapi) and our own loggers # Skip infrastructure loggers (uvicorn, fastapi) to preserve their configured levels
if name.startswith(("uvicorn", "fastapi", "llama_stack")): if name.startswith(("uvicorn", "fastapi")):
continue continue
# Update llama_stack loggers if root level was explicitly set (e.g., via all=CRITICAL)
if name.startswith("llama_stack") and "root" in category_levels:
logger.setLevel(root_level)
# Update third-party library loggers
elif not name.startswith("llama_stack"):
logger.setLevel(root_level) logger.setLevel(root_level)

View file

@ -131,7 +131,7 @@ class OpenAIResponsesImpl:
tool_context.recover_tools_from_previous_response(previous_response) tool_context.recover_tools_from_previous_response(previous_response)
elif conversation is not None: elif conversation is not None:
conversation_items = await self.conversations_api.list(conversation, order="asc") conversation_items = await self.conversations_api.list_items(conversation, order="asc")
# Use stored messages as source of truth (like previous_response.messages) # Use stored messages as source of truth (like previous_response.messages)
stored_messages = await self.responses_store.get_conversation_messages(conversation) stored_messages = await self.responses_store.get_conversation_messages(conversation)

View file

@ -48,6 +48,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
- overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses - overwrite_completion_id: If True, overwrites the 'id' field in OpenAI responses
- download_images: If True, downloads images and converts to base64 for providers that require it - download_images: If True, downloads images and converts to base64 for providers that require it
- embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata - embedding_model_metadata: A dictionary mapping model IDs to their embedding metadata
- construct_model_from_identifier: Method to construct a Model instance corresponding to the given identifier
- provider_data_api_key_field: Optional field name in provider data to look for API key - provider_data_api_key_field: Optional field name in provider data to look for API key
- list_provider_model_ids: Method to list available models from the provider - list_provider_model_ids: Method to list available models from the provider
- get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client - get_extra_client_params: Method to provide extra parameters to the AsyncOpenAI client
@ -121,6 +122,30 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
""" """
return {} return {}
def construct_model_from_identifier(self, identifier: str) -> Model:
"""
Construct a Model instance corresponding to the given identifier
Child classes can override this to customize model typing/metadata.
:param identifier: The provider's model identifier
:return: A Model instance
"""
if metadata := self.embedding_model_metadata.get(identifier):
return Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=identifier,
identifier=identifier,
model_type=ModelType.embedding,
metadata=metadata,
)
return Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=identifier,
identifier=identifier,
model_type=ModelType.llm,
)
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
""" """
List available models from the provider. List available models from the provider.
@ -416,21 +441,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
if self.allowed_models and provider_model_id not in self.allowed_models: if self.allowed_models and provider_model_id not in self.allowed_models:
logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list") logger.info(f"Skipping model {provider_model_id} as it is not in the allowed models list")
continue continue
if metadata := self.embedding_model_metadata.get(provider_model_id): model = self.construct_model_from_identifier(provider_model_id)
model = Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=provider_model_id,
identifier=provider_model_id,
model_type=ModelType.embedding,
metadata=metadata,
)
else:
model = Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=provider_model_id,
identifier=provider_model_id,
model_type=ModelType.llm,
)
self._model_cache[provider_model_id] = model self._model_cache[provider_model_id] = model
return list(self._model_cache.values()) return list(self._model_cache.values())

View file

@ -82,7 +82,7 @@ async def test_conversation_items(service):
assert len(item_list.data) == 1 assert len(item_list.data) == 1
assert item_list.data[0].id == "msg_test123" assert item_list.data[0].id == "msg_test123"
items = await service.list(conversation.id) items = await service.list_items(conversation.id)
assert len(items.data) == 1 assert len(items.data) == 1
@ -120,7 +120,7 @@ async def test_openai_type_compatibility(service):
assert hasattr(item_list, attr) assert hasattr(item_list, attr)
assert item_list.object == "list" assert item_list.object == "list"
items = await service.list(conversation.id) items = await service.list_items(conversation.id)
item = await service.retrieve(conversation.id, items.data[0].id) item = await service.retrieve(conversation.id, items.data[0].id)
item_dict = item.model_dump() item_dict = item.model_dump()

View file

@ -62,7 +62,7 @@ class TestConversationValidation:
conv_id = "conv_nonexistent" conv_id = "conv_nonexistent"
# Mock conversation not found # Mock conversation not found
mock_conversations_api.list.side_effect = ConversationNotFoundError("conv_nonexistent") mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
with pytest.raises(ConversationNotFoundError): with pytest.raises(ConversationNotFoundError):
await responses_impl_with_conversations.create_openai_response( await responses_impl_with_conversations.create_openai_response(
@ -160,7 +160,7 @@ class TestIntegrationWorkflow:
self, responses_impl_with_conversations, mock_conversations_api self, responses_impl_with_conversations, mock_conversations_api
): ):
"""Test creating a response with a valid conversation parameter.""" """Test creating a response with a valid conversation parameter."""
mock_conversations_api.list.return_value = ConversationItemList( mock_conversations_api.list_items.return_value = ConversationItemList(
data=[], first_id=None, has_more=False, last_id=None, object="list" data=[], first_id=None, has_more=False, last_id=None, object="list"
) )
@ -227,7 +227,7 @@ class TestIntegrationWorkflow:
self, responses_impl_with_conversations, mock_conversations_api self, responses_impl_with_conversations, mock_conversations_api
): ):
"""Test creating a response with a non-existent conversation.""" """Test creating a response with a non-existent conversation."""
mock_conversations_api.list.side_effect = ConversationNotFoundError("conv_nonexistent") mock_conversations_api.list_items.side_effect = ConversationNotFoundError("conv_nonexistent")
with pytest.raises(ConversationNotFoundError) as exc_info: with pytest.raises(ConversationNotFoundError) as exc_info:
await responses_impl_with_conversations.create_openai_response( await responses_impl_with_conversations.create_openai_response(

View file

@ -38,6 +38,28 @@ class OpenAIMixinWithEmbeddingsImpl(OpenAIMixinImpl):
} }
class OpenAIMixinWithCustomModelConstruction(OpenAIMixinImpl):
"""Test implementation that uses construct_model_from_identifier to add rerank models"""
embedding_model_metadata: dict[str, dict[str, int]] = {
"text-embedding-3-small": {"embedding_dimension": 1536, "context_length": 8192},
"text-embedding-ada-002": {"embedding_dimension": 1536, "context_length": 8192},
}
# Adds rerank models via construct_model_from_identifier
rerank_model_ids: set[str] = {"rerank-model-1", "rerank-model-2"}
def construct_model_from_identifier(self, identifier: str) -> Model:
if identifier in self.rerank_model_ids:
return Model(
provider_id=self.__provider_id__, # type: ignore[attr-defined]
provider_resource_id=identifier,
identifier=identifier,
model_type=ModelType.rerank,
)
return super().construct_model_from_identifier(identifier)
@pytest.fixture @pytest.fixture
def mixin(): def mixin():
"""Create a test instance of OpenAIMixin with mocked model_store""" """Create a test instance of OpenAIMixin with mocked model_store"""
@ -62,6 +84,13 @@ def mixin_with_embeddings():
return OpenAIMixinWithEmbeddingsImpl(config=config) return OpenAIMixinWithEmbeddingsImpl(config=config)
@pytest.fixture
def mixin_with_custom_model_construction():
"""Create a test instance using custom construct_model_from_identifier"""
config = RemoteInferenceProviderConfig()
return OpenAIMixinWithCustomModelConstruction(config=config)
@pytest.fixture @pytest.fixture
def mock_models(): def mock_models():
"""Create multiple mock OpenAI model objects""" """Create multiple mock OpenAI model objects"""
@ -113,6 +142,19 @@ def mock_client_context():
return _mock_client_context return _mock_client_context
def _assert_models_match_expected(actual_models, expected_models):
"""Verify the models match expected attributes.
Args:
actual_models: List of models to verify
expected_models: Mapping of model identifier to expected attribute values
"""
for identifier, expected_attrs in expected_models.items():
model = next(m for m in actual_models if m.identifier == identifier)
for attr_name, expected_value in expected_attrs.items():
assert getattr(model, attr_name) == expected_value
class TestOpenAIMixinListModels: class TestOpenAIMixinListModels:
"""Test cases for the list_models method""" """Test cases for the list_models method"""
@ -342,21 +384,71 @@ class TestOpenAIMixinEmbeddingModelMetadata:
assert result is not None assert result is not None
assert len(result) == 2 assert len(result) == 2
# Find the models in the result expected_models = {
embedding_model = next(m for m in result if m.identifier == "text-embedding-3-small") "text-embedding-3-small": {
llm_model = next(m for m in result if m.identifier == "gpt-4") "model_type": ModelType.embedding,
"metadata": {"embedding_dimension": 1536, "context_length": 8192},
"provider_id": "test-provider",
"provider_resource_id": "text-embedding-3-small",
},
"gpt-4": {
"model_type": ModelType.llm,
"metadata": {},
"provider_id": "test-provider",
"provider_resource_id": "gpt-4",
},
}
# Check embedding model _assert_models_match_expected(result, expected_models)
assert embedding_model.model_type == ModelType.embedding
assert embedding_model.metadata == {"embedding_dimension": 1536, "context_length": 8192}
assert embedding_model.provider_id == "test-provider"
assert embedding_model.provider_resource_id == "text-embedding-3-small"
# Check LLM model
assert llm_model.model_type == ModelType.llm class TestOpenAIMixinCustomModelConstruction:
assert llm_model.metadata == {} # No metadata for LLMs """Test cases for mixed model types (LLM, embedding, rerank) through construct_model_from_identifier"""
assert llm_model.provider_id == "test-provider"
assert llm_model.provider_resource_id == "gpt-4" async def test_mixed_model_types_identification(self, mixin_with_custom_model_construction, mock_client_context):
"""Test that LLM, embedding, and rerank models are correctly identified with proper types and metadata"""
# Create mock models: 1 embedding, 1 rerank, 1 LLM
mock_embedding_model = MagicMock(id="text-embedding-3-small")
mock_rerank_model = MagicMock(id="rerank-model-1")
mock_llm_model = MagicMock(id="gpt-4")
mock_models = [mock_embedding_model, mock_rerank_model, mock_llm_model]
mock_client = MagicMock()
async def mock_models_list():
for model in mock_models:
yield model
mock_client.models.list.return_value = mock_models_list()
with mock_client_context(mixin_with_custom_model_construction, mock_client):
result = await mixin_with_custom_model_construction.list_models()
assert result is not None
assert len(result) == 3
expected_models = {
"text-embedding-3-small": {
"model_type": ModelType.embedding,
"metadata": {"embedding_dimension": 1536, "context_length": 8192},
"provider_id": "test-provider",
"provider_resource_id": "text-embedding-3-small",
},
"rerank-model-1": {
"model_type": ModelType.rerank,
"metadata": {},
"provider_id": "test-provider",
"provider_resource_id": "rerank-model-1",
},
"gpt-4": {
"model_type": ModelType.llm,
"metadata": {},
"provider_id": "test-provider",
"provider_resource_id": "gpt-4",
},
}
_assert_models_match_expected(result, expected_models)
class TestOpenAIMixinAllowedModels: class TestOpenAIMixinAllowedModels:

6
uv.lock generated
View file

@ -2661,7 +2661,7 @@ wheels = [
[[package]] [[package]]
name = "openai" name = "openai"
version = "1.107.0" version = "2.5.0"
source = { registry = "https://pypi.org/simple" } source = { registry = "https://pypi.org/simple" }
dependencies = [ dependencies = [
{ name = "anyio" }, { name = "anyio" },
@ -2673,9 +2673,9 @@ dependencies = [
{ name = "tqdm" }, { name = "tqdm" },
{ name = "typing-extensions" }, { name = "typing-extensions" },
] ]
sdist = { url = "https://files.pythonhosted.org/packages/88/67/d6498de300f83ff57a79cb7aa96ef3bef8d6f070c3ded0f1b5b45442a6bc/openai-1.107.0.tar.gz", hash = "sha256:43e04927584e57d0e9e640ee0077c78baf8150098be96ebd5c512539b6c4e9a4", size = 566056, upload-time = "2025-09-08T19:25:47.604Z" } sdist = { url = "https://files.pythonhosted.org/packages/72/39/aa3767c920c217ef56f27e89cbe3aaa43dd6eea3269c95f045c5761b9df1/openai-2.5.0.tar.gz", hash = "sha256:f8fa7611f96886a0f31ac6b97e58bc0ada494b255ee2cfd51c8eb502cfcb4814", size = 590333, upload-time = "2025-10-17T18:14:47.669Z" }
wheels = [ wheels = [
{ url = "https://files.pythonhosted.org/packages/91/ed/e8a4fd20390f2858b95227c288df8fe0c835f7c77625f7583609161684ba/openai-1.107.0-py3-none-any.whl", hash = "sha256:3dcfa3cbb116bd6924b27913b8da28c4a787379ff60049588547a1013e6d6438", size = 950968, upload-time = "2025-09-08T19:25:45.552Z" }, { url = "https://files.pythonhosted.org/packages/14/f3/ebbd700d8dc1e6380a7a382969d96bc0cbea8717b52fb38ff0ca2a7653e8/openai-2.5.0-py3-none-any.whl", hash = "sha256:21380e5f52a71666dbadbf322dd518bdf2b9d11ed0bb3f96bea17310302d6280", size = 999851, upload-time = "2025-10-17T18:14:45.528Z" },
] ]
[[package]] [[package]]