diff --git a/litellm/__init__.py b/litellm/__init__.py index 956834afc3..0527ef199f 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -813,6 +813,7 @@ from .utils import ( ) from .types.utils import ImageObject +from .llms.custom_llm import CustomLLM from .llms.huggingface_restapi import HuggingfaceConfig from .llms.anthropic import AnthropicConfig from .llms.databricks import DatabricksConfig, DatabricksEmbeddingConfig @@ -909,3 +910,11 @@ from .cost_calculator import response_cost_calculator, cost_per_token from .types.adapter import AdapterItem adapters: List[AdapterItem] = [] + +### CUSTOM LLMs ### +from .types.llms.custom_llm import CustomLLMItem + +custom_provider_map: List[CustomLLMItem] = [] +_custom_providers: List[str] = ( + [] +) # internal helper util, used to track names of custom providers diff --git a/litellm/llms/custom_llm.py b/litellm/llms/custom_llm.py new file mode 100644 index 0000000000..fac1eb2936 --- /dev/null +++ b/litellm/llms/custom_llm.py @@ -0,0 +1,70 @@ +# What is this? +## Handler file for a Custom Chat LLM + +""" +- completion +- acompletion +- streaming +- async_streaming +""" + +import copy +import json +import os +import time +import types +from enum import Enum +from functools import partial +from typing import Callable, List, Literal, Optional, Tuple, Union + +import httpx # type: ignore +import requests # type: ignore + +import litellm +from litellm.litellm_core_utils.core_helpers import map_finish_reason +from litellm.llms.custom_httpx.http_handler import AsyncHTTPHandler, HTTPHandler +from litellm.types.llms.databricks import GenericStreamingChunk +from litellm.types.utils import ProviderField +from litellm.utils import CustomStreamWrapper, EmbeddingResponse, ModelResponse, Usage + +from .base import BaseLLM +from .prompt_templates.factory import custom_prompt, prompt_factory + + +class CustomLLMError(Exception): # use this for all your exceptions + def __init__( + self, + status_code, + message, + ): + self.status_code = status_code + self.message = message + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs + + +def custom_chat_llm_router(): + """ + Routes call to CustomLLM completion/acompletion/streaming/astreaming functions, based on call type + + Validates if response is in expected format + """ + pass + + +class CustomLLM(BaseLLM): + def __init__(self) -> None: + super().__init__() + + def completion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + def streaming(self, *args, **kwargs): + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def acompletion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def astreaming(self, *args, **kwargs): + raise CustomLLMError(status_code=500, message="Not implemented yet!") diff --git a/litellm/main.py b/litellm/main.py index f724a68bd3..539c3d3e1c 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -107,6 +107,7 @@ from .llms.anthropic_text import AnthropicTextCompletion from .llms.azure import AzureChatCompletion from .llms.azure_text import AzureTextCompletion from .llms.bedrock_httpx import BedrockConverseLLM, BedrockLLM +from .llms.custom_llm import CustomLLM, custom_chat_llm_router from .llms.databricks import DatabricksChatCompletion from .llms.huggingface_restapi import Huggingface from .llms.openai import OpenAIChatCompletion, OpenAITextCompletion @@ -2690,6 +2691,20 @@ def completion( model_response.created = int(time.time()) model_response.model = model response = model_response + elif ( + custom_llm_provider in litellm._custom_providers + ): # Assume custom LLM provider + # Get the Custom Handler + custom_handler: Optional[CustomLLM] = None + for item in litellm.custom_provider_map: + if item["provider"] == custom_llm_provider: + custom_handler = item["custom_handler"] + + if custom_handler is None: + raise ValueError( + f"Unable to map your input to a model. Check your input - {args}" + ) + response = custom_handler.completion() else: raise ValueError( f"Unable to map your input to a model. Check your input - {args}" diff --git a/litellm/tests/test_custom_llm.py b/litellm/tests/test_custom_llm.py new file mode 100644 index 0000000000..0506986eb9 --- /dev/null +++ b/litellm/tests/test_custom_llm.py @@ -0,0 +1,63 @@ +# What is this? +## Unit tests for the CustomLLM class + + +import asyncio +import os +import sys +import time +import traceback + +import openai +import pytest + +sys.path.insert( + 0, os.path.abspath("../..") +) # Adds the parent directory to the system path +import os +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +from dotenv import load_dotenv + +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + + +def test_get_llm_provider(): + from litellm.utils import custom_llm_setup + + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + + custom_llm_setup() + + model, provider, _, _ = get_llm_provider(model="custom_llm/my-fake-model") + + assert provider == "custom_llm" + + +def test_simple_completion(): + my_custom_llm = MyCustomLLM() + litellm.custom_provider_map = [ + {"provider": "custom_llm", "custom_handler": my_custom_llm} + ] + resp = completion( + model="custom_llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + + assert resp.choices[0].message.content == "Hi!" diff --git a/litellm/types/llms/custom_llm.py b/litellm/types/llms/custom_llm.py new file mode 100644 index 0000000000..d5499a4194 --- /dev/null +++ b/litellm/types/llms/custom_llm.py @@ -0,0 +1,10 @@ +from typing import List + +from typing_extensions import Dict, Required, TypedDict, override + +from litellm.llms.custom_llm import CustomLLM + + +class CustomLLMItem(TypedDict): + provider: str + custom_handler: CustomLLM diff --git a/litellm/utils.py b/litellm/utils.py index e104de958a..0f1b0315db 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -330,6 +330,18 @@ class Rules: ####### CLIENT ################### # make it easy to log if completion/embedding runs succeeded or failed + see what happened | Non-Blocking +def custom_llm_setup(): + """ + Add custom_llm provider to provider list + """ + for custom_llm in litellm.custom_provider_map: + if custom_llm["provider"] not in litellm.provider_list: + litellm.provider_list.append(custom_llm["provider"]) + + if custom_llm["provider"] not in litellm._custom_providers: + litellm._custom_providers.append(custom_llm["provider"]) + + def function_setup( original_function: str, rules_obj, start_time, *args, **kwargs ): # just run once to check if user wants to send their data anywhere - PostHog/Sentry/Slack/etc. @@ -341,6 +353,10 @@ def function_setup( try: global callback_list, add_breadcrumb, user_logger_fn, Logging + ## CUSTOM LLM SETUP ## + custom_llm_setup() + + ## LOGGING SETUP function_id = kwargs["id"] if "id" in kwargs else None if len(litellm.callbacks) > 0: