diff --git a/docs/my-website/docs/providers/custom_llm_server.md b/docs/my-website/docs/providers/custom_llm_server.md new file mode 100644 index 000000000..f8d5fb551 --- /dev/null +++ b/docs/my-website/docs/providers/custom_llm_server.md @@ -0,0 +1,73 @@ +# Custom API Server (Custom Format) + +LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format + + +:::info + +For calling an openai-compatible endpoint, [go here](./openai_compatible.md) +::: + +## Quick Start + +```python +import litellm +from litellm import CustomLLM, completion, get_llm_provider + + +class MyCustomLLM(CustomLLM): + def completion(self, *args, **kwargs) -> litellm.ModelResponse: + return litellm.completion( + model="gpt-3.5-turbo", + messages=[{"role": "user", "content": "Hello world"}], + mock_response="Hi!", + ) # type: ignore + +litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER + {"provider": "my-custom-llm", "custom_handler": my_custom_llm} + ] + +resp = completion( + model="my-custom-llm/my-fake-model", + messages=[{"role": "user", "content": "Hello world!"}], + ) + +assert resp.choices[0].message.content == "Hi!" +``` + + +## Custom Handler Spec + +```python +from litellm.types.utils import GenericStreamingChunk, ModelResponse +from typing import Iterator, AsyncIterator +from litellm.llms.base import BaseLLM + +class CustomLLMError(Exception): # use this for all your exceptions + def __init__( + self, + status_code, + message, + ): + self.status_code = status_code + self.message = message + super().__init__( + self.message + ) # Call the base class constructor with the parameters it needs + +class CustomLLM(BaseLLM): + def __init__(self) -> None: + super().__init__() + + def completion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def acompletion(self, *args, **kwargs) -> ModelResponse: + raise CustomLLMError(status_code=500, message="Not implemented yet!") + + async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]: + raise CustomLLMError(status_code=500, message="Not implemented yet!") +``` \ No newline at end of file diff --git a/docs/my-website/docs/providers/custom_openai_proxy.md b/docs/my-website/docs/providers/custom_openai_proxy.md deleted file mode 100644 index b6f2eccac..000000000 --- a/docs/my-website/docs/providers/custom_openai_proxy.md +++ /dev/null @@ -1,129 +0,0 @@ -# Custom API Server (OpenAI Format) - -LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format - -## API KEYS -No api keys required - -## Set up your Custom API Server -Your server should have the following Endpoints: - -Here's an example OpenAI proxy server with routes: https://replit.com/@BerriAI/openai-proxy#main.py - -### Required Endpoints -- POST `/chat/completions` - chat completions endpoint - -### Optional Endpoints -- POST `/completions` - completions endpoint -- Get `/models` - available models on server -- POST `/embeddings` - creates an embedding vector representing the input text. - - -## Example Usage - -### Call `/chat/completions` -In order to use your custom OpenAI Chat Completion proxy with LiteLLM, ensure you set - -* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co" -* `custom_llm_provider` to `openai` this ensures litellm uses the `openai.ChatCompletion` to your api_base - -```python -import os -from litellm import completion - -## set ENV variables -os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy - -messages = [{ "content": "Hello, how are you?","role": "user"}] - -response = completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://openai-proxy.berriai.repl.co", - custom_llm_provider="openai" # litellm will use the openai.ChatCompletion to make the request - -) -print(response) -``` - -#### Response -```json -{ - "object": - "chat.completion", - "choices": [{ - "finish_reason": "stop", - "index": 0, - "message": { - "content": - "The sky, a canvas of blue,\nA work of art, pure and true,\nA", - "role": "assistant" - } - }], - "id": - "chatcmpl-7fbd6077-de10-4cb4-a8a4-3ef11a98b7c8", - "created": - 1699290237.408061, - "model": - "togethercomputer/llama-2-70b-chat", - "usage": { - "completion_tokens": 18, - "prompt_tokens": 14, - "total_tokens": 32 - } - } -``` - - -### Call `/completions` -In order to use your custom OpenAI Completion proxy with LiteLLM, ensure you set - -* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co" -* `custom_llm_provider` to `text-completion-openai` this ensures litellm uses the `openai.Completion` to your api_base - -```python -import os -from litellm import completion - -## set ENV variables -os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy - -messages = [{ "content": "Hello, how are you?","role": "user"}] - -response = completion( - model="command-nightly", - messages=[{ "content": "Hello, how are you?","role": "user"}], - api_base="https://openai-proxy.berriai.repl.co", - custom_llm_provider="text-completion-openai" # litellm will use the openai.Completion to make the request - -) -print(response) -``` - -#### Response -```json -{ - "warning": - "This model version is deprecated. Migrate before January 4, 2024 to avoid disruption of service. Learn more https://platform.openai.com/docs/deprecations", - "id": - "cmpl-8HxHqF5dymQdALmLplS0dWKZVFe3r", - "object": - "text_completion", - "created": - 1699290166, - "model": - "text-davinci-003", - "choices": [{ - "text": - "\n\nThe weather in San Francisco varies depending on what time of year and time", - "index": 0, - "logprobs": None, - "finish_reason": "length" - }], - "usage": { - "prompt_tokens": 7, - "completion_tokens": 16, - "total_tokens": 23 - } - } -``` \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index d228e09d2..c1ce83068 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -175,7 +175,8 @@ const sidebars = { "providers/aleph_alpha", "providers/baseten", "providers/openrouter", - "providers/custom_openai_proxy", + // "providers/custom_openai_proxy", + "providers/custom_llm_server", "providers/petals", ],