mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
161 lines
4.2 KiB
Python
161 lines
4.2 KiB
Python
# What is this?
|
|
## Handler file for a Custom Chat LLM
|
|
|
|
"""
|
|
- completion
|
|
- acompletion
|
|
- streaming
|
|
- async_streaming
|
|
"""
|
|
|
|
import copy
|
|
import json
|
|
import os
|
|
import time
|
|
import types
|
|
from enum import Enum
|
|
from functools import partial
|
|
from typing import (
|
|
Any,
|
|
AsyncGenerator,
|
|
AsyncIterator,
|
|
Callable,
|
|
Coroutine,
|
|
Iterator,
|
|
List,
|
|
Literal,
|
|
Optional,
|
|
Tuple,
|
|
Union,
|
|
)
|
|
|
|
import httpx # type: ignore
|
|
import requests # type: ignore
|
|
|
|
import litellm
|
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|
from litellm.types.utils import GenericStreamingChunk, ProviderField
|
|
from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage
|
|
|
|
from .base import BaseLLM
|
|
from .prompt_templates.factory import custom_prompt, prompt_factory
|
|
|
|
|
|
class CustomLLMError(Exception): # use this for all your exceptions
|
|
def __init__(
|
|
self,
|
|
status_code,
|
|
message,
|
|
):
|
|
self.status_code = status_code
|
|
self.message = message
|
|
super().__init__(
|
|
self.message
|
|
) # Call the base class constructor with the parameters it needs
|
|
|
|
|
|
class CustomLLM(BaseLLM):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
def streaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> Iterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def acompletion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def astreaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> AsyncIterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
|
|
def custom_chat_llm_router(
|
|
async_fn: bool, stream: Optional[bool], custom_llm: CustomLLM
|
|
):
|
|
"""
|
|
Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type
|
|
|
|
Validates if response is in expected format
|
|
"""
|
|
if async_fn:
|
|
if stream:
|
|
return custom_llm.astreaming
|
|
return custom_llm.acompletion
|
|
if stream:
|
|
return custom_llm.streaming
|
|
return custom_llm.completion
|