feat(openai_movement)!: Change URL structures to kill /openai/v1 (part 2) (#3605)

This commit is contained in:
Ashwin Bharambe 2025-09-29 22:57:37 -07:00 committed by GitHub
parent 3a09f00cdb
commit 56b625d18a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 3 additions and 2834 deletions

View file

@ -7,7 +7,7 @@ sidebar_position: 1
### Server path
Llama Stack exposes an OpenAI-compatible API endpoint at `/v1/openai/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1/openai/v1`.
Llama Stack exposes OpenAI-compatible API endpoints at `/v1`. So, for a Llama Stack server running locally on port `8321`, the full url to the OpenAI-compatible API endpoint is `http://localhost:8321/v1`.
### Clients
@ -25,12 +25,12 @@ client = LlamaStackClient(base_url="http://localhost:8321")
#### OpenAI Client
When using an OpenAI client, set the `base_url` to the `/v1/openai/v1` path on your Llama Stack server.
When using an OpenAI client, set the `base_url` to the `/v1` path on your Llama Stack server.
```python
from openai import OpenAI
client = OpenAI(base_url="http://localhost:8321/v1/openai/v1", api_key="none")
client = OpenAI(base_url="http://localhost:8321/v1", api_key="none")
```
Regardless of the client you choose, the following code examples should all work the same.

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -694,7 +694,6 @@ class Agents(Protocol):
#
# Both of these APIs are inherently stateful.
@webmethod(route="/openai/v1/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response(
self,
@ -707,7 +706,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response(
self,
@ -733,7 +731,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses(
self,
@ -752,7 +749,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items(
self,
@ -775,7 +771,6 @@ class Agents(Protocol):
"""
...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete an OpenAI response by its ID.

View file

@ -43,7 +43,6 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes.
"""
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch(
self,
@ -64,7 +63,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch.
@ -74,7 +72,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress.
@ -84,7 +81,6 @@ class Batches(Protocol):
"""
...
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches(
self,

View file

@ -105,7 +105,6 @@ class OpenAIFileDeleteResponse(BaseModel):
@trace_protocol
class Files(Protocol):
# OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file(
self,
@ -128,7 +127,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files(
self,
@ -148,7 +146,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file(
self,
@ -162,7 +159,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file(
self,
@ -176,7 +172,6 @@ class Files(Protocol):
"""
...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content(
self,

View file

@ -1089,7 +1089,6 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion(
self,
@ -1141,7 +1140,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion(
self,
@ -1198,7 +1196,6 @@ class InferenceProvider(Protocol):
"""
...
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings(
self,
@ -1228,7 +1225,6 @@ class Inference(InferenceProvider):
- Embedding models: these models generate embeddings to be used for semantic search.
"""
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions(
self,
@ -1247,7 +1243,6 @@ class Inference(InferenceProvider):
"""
raise NotImplementedError("List chat completions is not implemented")
@webmethod(route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Describe a chat completion by its ID.

View file

@ -111,14 +111,6 @@ class Models(Protocol):
"""
...
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API.
:returns: A OpenAIListModelsResponse.
"""
...
@webmethod(route="/models/{model_id:path}", method="GET", level=LLAMA_STACK_API_V1)
async def get_model(
self,

View file

@ -114,7 +114,6 @@ class Safety(Protocol):
"""
...
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str) -> ModerationObject:
"""Classifies if text and/or image inputs are potentially harmful.

View file

@ -473,7 +473,6 @@ class VectorIO(Protocol):
...
# OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store(
self,
@ -500,7 +499,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores(
self,
@ -519,7 +517,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store(
self,
@ -532,7 +529,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1)
async def openai_update_vector_store(
self,
@ -551,7 +547,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_vector_store(
self,
@ -564,7 +559,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}/search", method="POST", level=LLAMA_STACK_API_V1)
async def openai_search_vector_store(
self,
@ -591,7 +585,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_attach_file_to_vector_store(
self,
@ -610,7 +603,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(route="/openai/v1/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
@webmethod(route="/vector_stores/{vector_store_id}/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files_in_vector_store(
self,
@ -633,9 +625,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1
)
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store_file(
self,
@ -650,11 +639,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
)
@webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
@ -673,9 +657,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1
)
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="POST", level=LLAMA_STACK_API_V1)
async def openai_update_vector_store_file(
self,
@ -692,9 +673,6 @@ class VectorIO(Protocol):
"""
...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1
)
@webmethod(route="/vector_stores/{vector_store_id}/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_vector_store_file(
self,