mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-25 10:44:24 +00:00
* refactor(vertex_ai_partner_models/anthropic): refactor anthropic to use partner model logic * fix(vertex_ai/): support passing custom api base to partner models Fixes https://github.com/BerriAI/litellm/issues/4317 * fix(proxy_server.py): Fix prometheus premium user check logic * docs(prometheus.md): update quick start docs * fix(custom_llm.py): support passing dynamic api key + api base * fix(realtime_api/main.py): Add request/response logging for realtime api endpoints Closes https://github.com/BerriAI/litellm/issues/6081 * feat(openai/realtime): add openai realtime api logging Closes https://github.com/BerriAI/litellm/issues/6081 * fix(realtime_streaming.py): fix linting errors * fix(realtime_streaming.py): fix linting errors * fix: fix linting errors * fix pattern match router * Add literalai in the sidebar observability category (#6163) * fix: add literalai in the sidebar * fix: typo * update (#6160) * Feat: Add Langtrace integration (#5341) * Feat: Add Langtrace integration * add langtrace service name * fix timestamps for traces * add tests * Discard Callback + use existing otel logger * cleanup * remove print statments * remove callback * add docs * docs * add logging docs * format logging * remove emoji and add litellm proxy example * format logging * format `logging.md` * add langtrace docs to logging.md * sync conflict * docs fix * (perf) move s3 logging to Batch logging + async [94% faster perf under 100 RPS on 1 litellm instance] (#6165) * fix move s3 to use customLogger * add basic s3 logging test * add s3 to custom logger compatible * use batch logger for s3 * s3 set flush interval and batch size * fix s3 logging * add notes on s3 logging * fix s3 logging * add basic s3 logging test * fix s3 type errors * add test for sync logging on s3 * fix: fix to debug log --------- Co-authored-by: Ishaan Jaff <ishaanjaffer0324@gmail.com> Co-authored-by: Willy Douhard <willy.douhard@gmail.com> Co-authored-by: yujonglee <yujonglee.dev@gmail.com> Co-authored-by: Ali Waleed <ali@scale3labs.com>
199 lines
5.3 KiB
Python
199 lines
5.3 KiB
Python
# What is this?
|
|
## Handler file for a Custom Chat LLM
|
|
|
|
"""
|
|
- completion
|
|
- acompletion
|
|
- streaming
|
|
- async_streaming
|
|
"""
|
|
|
|
import copy
|
|
import json
|
|
import os
|
|
import time
|
|
import types
|
|
from enum import Enum
|
|
from functools import partial
|
|
from typing import (
|
|
Any,
|
|
AsyncGenerator,
|
|
AsyncIterator,
|
|
Callable,
|
|
Coroutine,
|
|
Iterator,
|
|
List,
|
|
Literal,
|
|
Optional,
|
|
Tuple,
|
|
Union,
|
|
)
|
|
|
|
import httpx # type: ignore
|
|
import requests # type: ignore
|
|
|
|
import litellm
|
|
from litellm.litellm_core_utils.core_helpers import map_finish_reason
|
|
from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler
|
|
from litellm.types.utils import GenericStreamingChunk, ProviderField
|
|
from litellm.utils import (
|
|
CustomStreamWrapper,
|
|
EmbeddingResponse,
|
|
ImageResponse,
|
|
ModelResponse,
|
|
Usage,
|
|
)
|
|
|
|
from .base import BaseLLM
|
|
from .prompt_templates.factory import custom_prompt, prompt_factory
|
|
|
|
|
|
class CustomLLMError(Exception): # use this for all your exceptions
|
|
def __init__(
|
|
self,
|
|
status_code,
|
|
message,
|
|
):
|
|
self.status_code = status_code
|
|
self.message = message
|
|
super().__init__(
|
|
self.message
|
|
) # Call the base class constructor with the parameters it needs
|
|
|
|
|
|
class CustomLLM(BaseLLM):
|
|
def __init__(self) -> None:
|
|
super().__init__()
|
|
|
|
def completion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
def streaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> Iterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def acompletion(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> ModelResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def astreaming(
|
|
self,
|
|
model: str,
|
|
messages: list,
|
|
api_base: str,
|
|
custom_prompt_dict: dict,
|
|
model_response: ModelResponse,
|
|
print_verbose: Callable,
|
|
encoding,
|
|
api_key,
|
|
logging_obj,
|
|
optional_params: dict,
|
|
acompletion=None,
|
|
litellm_params=None,
|
|
logger_fn=None,
|
|
headers={},
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> AsyncIterator[GenericStreamingChunk]:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
def image_generation(
|
|
self,
|
|
model: str,
|
|
prompt: str,
|
|
api_key: Optional[str],
|
|
api_base: Optional[str],
|
|
model_response: ImageResponse,
|
|
optional_params: dict,
|
|
logging_obj: Any,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[HTTPHandler] = None,
|
|
) -> ImageResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
async def aimage_generation(
|
|
self,
|
|
model: str,
|
|
prompt: str,
|
|
model_response: ImageResponse,
|
|
api_key: Optional[
|
|
str
|
|
], # dynamically set api_key - https://docs.litellm.ai/docs/set_keys#api_key
|
|
api_base: Optional[
|
|
str
|
|
], # dynamically set api_base - https://docs.litellm.ai/docs/set_keys#api_base
|
|
optional_params: dict,
|
|
logging_obj: Any,
|
|
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
|
client: Optional[AsyncHTTPHandler] = None,
|
|
) -> ImageResponse:
|
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
|
|
|
|
|
def custom_chat_llm_router(
|
|
async_fn: bool, stream: Optional[bool], custom_llm: CustomLLM
|
|
):
|
|
"""
|
|
Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type
|
|
|
|
Validates if response is in expected format
|
|
"""
|
|
if async_fn:
|
|
if stream:
|
|
return custom_llm.astreaming
|
|
return custom_llm.acompletion
|
|
if stream:
|
|
return custom_llm.streaming
|
|
return custom_llm.completion
|