agentic loop has a RAG implementation

This commit is contained in:
Ashwin Bharambe 2024-08-23 15:20:40 -07:00
parent 77d6055d9f
commit 14637bea66
4 changed files with 245 additions and 111 deletions

View file

@ -101,6 +101,11 @@ class BatchChatCompletionResponse(BaseModel):
completion_message_batch: List[CompletionMessage]
@json_schema_type
class EmbeddingsResponse(BaseModel):
embeddings: List[List[float]]
class Inference(Protocol):
@webmethod(route="/inference/completion")
async def completion(
@ -114,6 +119,13 @@ class Inference(Protocol):
request: ChatCompletionRequest,
) -> Union[ChatCompletionResponse, ChatCompletionResponseStreamChunk]: ...
@webmethod(route="/inference/embeddings")
async def embeddings(
self,
model: str,
contents: List[InterleavedTextMedia],
) -> EmbeddingsResponse: ...
@webmethod(route="/inference/batch_completion")
async def batch_completion(
self,