forked from phoenix-oss/llama-stack-mirror
		
	We desperately need to document our APIs. This is the basic requirement of having a Spec :) This PR updates the OpenAPI generator so documentation for request parameters and object fields can be properly added to the OpenAPI specs. From there, this should get picked by Stainless, etc. ## Test Plan: Updated client-sdk (See https://github.com/meta-llama/llama-stack-client-python/pull/104) and then ran: ```bash cd tests/client-sdk LLAMA_STACK_CONFIG=../../llama_stack/templates/fireworks/run.yaml pytest -s -v inference/test_inference.py agents/test_agents.py ```
		
			
				
	
	
		
			60 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			60 lines
		
	
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| # Copyright (c) Meta Platforms, Inc. and affiliates.
 | |
| # All rights reserved.
 | |
| #
 | |
| # This source code is licensed under the terms described in the LICENSE file in
 | |
| # the root directory of this source tree.
 | |
| 
 | |
| from typing import List, Optional, Protocol, runtime_checkable
 | |
| 
 | |
| from llama_models.schema_utils import json_schema_type, webmethod
 | |
| from pydantic import BaseModel
 | |
| 
 | |
| from llama_stack.apis.inference import (
 | |
|     ChatCompletionResponse,
 | |
|     CompletionResponse,
 | |
|     InterleavedContent,
 | |
|     LogProbConfig,
 | |
|     Message,
 | |
|     ResponseFormat,
 | |
|     SamplingParams,
 | |
|     ToolChoice,
 | |
|     ToolDefinition,
 | |
|     ToolPromptFormat,
 | |
| )
 | |
| 
 | |
| 
 | |
| @json_schema_type
 | |
| class BatchCompletionResponse(BaseModel):
 | |
|     batch: List[CompletionResponse]
 | |
| 
 | |
| 
 | |
| @json_schema_type
 | |
| class BatchChatCompletionResponse(BaseModel):
 | |
|     batch: List[ChatCompletionResponse]
 | |
| 
 | |
| 
 | |
| @runtime_checkable
 | |
| class BatchInference(Protocol):
 | |
|     @webmethod(route="/batch-inference/completion", method="POST")
 | |
|     async def batch_completion(
 | |
|         self,
 | |
|         model: str,
 | |
|         content_batch: List[InterleavedContent],
 | |
|         sampling_params: Optional[SamplingParams] = SamplingParams(),
 | |
|         response_format: Optional[ResponseFormat] = None,
 | |
|         logprobs: Optional[LogProbConfig] = None,
 | |
|     ) -> BatchCompletionResponse: ...
 | |
| 
 | |
|     @webmethod(route="/batch-inference/chat-completion", method="POST")
 | |
|     async def batch_chat_completion(
 | |
|         self,
 | |
|         model: str,
 | |
|         messages_batch: List[List[Message]],
 | |
|         sampling_params: Optional[SamplingParams] = SamplingParams(),
 | |
|         # zero-shot tool definitions as input to the model
 | |
|         tools: Optional[List[ToolDefinition]] = list,
 | |
|         tool_choice: Optional[ToolChoice] = ToolChoice.auto,
 | |
|         tool_prompt_format: Optional[ToolPromptFormat] = None,
 | |
|         response_format: Optional[ResponseFormat] = None,
 | |
|         logprobs: Optional[LogProbConfig] = None,
 | |
|     ) -> BatchChatCompletionResponse: ...
 |