mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 02:34:29 +00:00
* fix(generic_api_callback.py): remove requests lib usage * fix(budget_manager.py): remove requests lib usgae * fix(main.py): cleanup requests lib usage * fix(utils.py): remove requests lib usage * fix(argilla.py): fix argilla test * fix(athina.py): replace 'requests' lib usage with litellm module * fix(greenscale.py): replace 'requests' lib usage with httpx * fix: remove unused 'requests' lib import + replace usage in some places * fix(prompt_layer.py): remove 'requests' lib usage from prompt layer * fix(ollama_chat.py): remove 'requests' lib usage * fix(baseten.py): replace 'requests' lib usage * fix(codestral/): replace 'requests' lib usage * fix(predibase/): replace 'requests' lib usage * refactor: cleanup unused 'requests' lib imports * fix(oobabooga.py): cleanup 'requests' lib usage * fix(invoke_handler.py): remove unused 'requests' lib usage * refactor: cleanup unused 'requests' lib import * fix: fix linting errors * refactor(ollama/): move ollama to using base llm http handler removes 'requests' lib dep for ollama integration * fix(ollama_chat.py): fix linting errors * fix(ollama/completion/transformation.py): convert non-jpeg/png image to jpeg/png before passing to ollama
197 lines
5.2 KiB
Python
197 lines
5.2 KiB
Python
# What is this?
|
|
## Handler file for a Custom Chat LLM
|
|
|
|
"""
|
|
- completion
|
|
- acompletion
|
|
- streaming
|
|
- async_streaming
|
|
"""
|
|
|
|
import copy
|
|
import json
|
|
import os
|
|
import time
|
|
import types
|
|
from enum import Enum
|
|
from functools import partial
|
|
from typing import (
|
|
Any,
|
|
AsyncGenerator,
|
|
AsyncIterator,
|
|
Callable,
|
|
Coroutine,
|
|
Iterator,
|
|
List,
|
|
Literal,
|
|
Optional,
|
|
Tuple,
|
|
Union,
|
|
)
|
|
|
|
import httpx
|
|
|
|
import litellm
|
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|
from litellm.types.utils import GenericStreamingChunk, ProviderField
|
|
from litellm.utils import (
|
|
CustomStreamWrapper,
|
|
EmbeddingResponse,
|
|
ImageResponse,
|
|
ModelResponse,
|
|
Usage,
|
|
)
|
|
|
|
from .base import BaseLLM
|
|
|
|
|
|
class CustomLLMError(Exception): # use this for all your exceptions
|
|
def __init__(
|
|
self,
|
|
status_code,
|
|
message,
|
|
):
|
|
self.status_code = status_code
|
|
self.message = message
|
|
super().__init__(
|
|
self.message
|
|
) # Call the base class constructor with the parameters it needs
|
|
|
|
|
|
class CustomLLM(BaseLLM):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
def streaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> Iterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def acompletion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def astreaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> AsyncIterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
def image_generation(
|
|
self,
|
|
model: str,
|
|
prompt: str,
|
|
api_key: Optional[str],
|
|
api_base: Optional[str],
|
|
model_response: ImageResponse,
|
|
optional_params: dict,
|
|
logging_obj: Any,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> ImageResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def aimage_generation(
|
|
self,
|
|
model: str,
|
|
prompt: str,
|
|
model_response: ImageResponse,
|
|
api_key: Optional[
|
|
str
|
|
], # dynamically set api_key - https://docs.litellm.ai/docs/set_keys#api_key
|
|
api_base: Optional[
|
|
str
|
|
], # dynamically set api_base - https://docs.litellm.ai/docs/set_keys#api_base
|
|
optional_params: dict,
|
|
logging_obj: Any,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> ImageResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
|
|
def custom_chat_llm_router(
|
|
async_fn: bool, stream: Optional[bool], custom_llm: CustomLLM
|
|
):
|
|
"""
|
|
Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type
|
|
|
|
Validates if response is in expected format
|
|
"""
|
|
if async_fn:
|
|
if stream:
|
|
return custom_llm.astreaming
|
|
return custom_llm.acompletion
|
|
if stream:
|
|
return custom_llm.streaming
|
|
return custom_llm.completion
|