docs(custom_llm_server.md): add calling custom llm server to docs

This commit is contained in:
Krrish Dholakia 2024-07-25 17:41:19 -07:00
parent 060249c7e0
commit a2d07cfe64
3 changed files with 75 additions and 130 deletions

View file

@ -0,0 +1,73 @@
# Custom API Server (Custom Format)
LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
:::info
For calling an openai-compatible endpoint, [go here](./openai_compatible.md)
:::
## Quick Start
```python
import litellm
from litellm import CustomLLM, completion, get_llm_provider
class MyCustomLLM(CustomLLM):
def completion(self, *args, **kwargs) -> litellm.ModelResponse:
return litellm.completion(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": "Hello world"}],
mock_response="Hi!",
) # type: ignore
litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER
{"provider": "my-custom-llm", "custom_handler": my_custom_llm}
]
resp = completion(
model="my-custom-llm/my-fake-model",
messages=[{"role": "user", "content": "Hello world!"}],
)
assert resp.choices[0].message.content == "Hi!"
```
## Custom Handler Spec
```python
from litellm.types.utils import GenericStreamingChunk, ModelResponse
from typing import Iterator, AsyncIterator
from litellm.llms.base import BaseLLM
class CustomLLMError(Exception): # use this for all your exceptions
def __init__(
self,
status_code,
message,
):
self.status_code = status_code
self.message = message
super().__init__(
self.message
) # Call the base class constructor with the parameters it needs
class CustomLLM(BaseLLM):
def __init__(self) -> None:
super().__init__()
def completion(self, *args, **kwargs) -> ModelResponse:
raise CustomLLMError(status_code=500, message="Not implemented yet!")
def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
raise CustomLLMError(status_code=500, message="Not implemented yet!")
async def acompletion(self, *args, **kwargs) -> ModelResponse:
raise CustomLLMError(status_code=500, message="Not implemented yet!")
async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
raise CustomLLMError(status_code=500, message="Not implemented yet!")
```

View file

@ -1,129 +0,0 @@
# Custom API Server (OpenAI Format)
LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
## API KEYS
No api keys required
## Set up your Custom API Server
Your server should have the following Endpoints:
Here's an example OpenAI proxy server with routes: https://replit.com/@BerriAI/openai-proxy#main.py
### Required Endpoints
- POST `/chat/completions` - chat completions endpoint
### Optional Endpoints
- POST `/completions` - completions endpoint
- Get `/models` - available models on server
- POST `/embeddings` - creates an embedding vector representing the input text.
## Example Usage
### Call `/chat/completions`
In order to use your custom OpenAI Chat Completion proxy with LiteLLM, ensure you set
* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
* `custom_llm_provider` to `openai` this ensures litellm uses the `openai.ChatCompletion` to your api_base
```python
import os
from litellm import completion
## set ENV variables
os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(
model="command-nightly",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://openai-proxy.berriai.repl.co",
custom_llm_provider="openai" # litellm will use the openai.ChatCompletion to make the request
)
print(response)
```
#### Response
```json
{
"object":
"chat.completion",
"choices": [{
"finish_reason": "stop",
"index": 0,
"message": {
"content":
"The sky, a canvas of blue,\nA work of art, pure and true,\nA",
"role": "assistant"
}
}],
"id":
"chatcmpl-7fbd6077-de10-4cb4-a8a4-3ef11a98b7c8",
"created":
1699290237.408061,
"model":
"togethercomputer/llama-2-70b-chat",
"usage": {
"completion_tokens": 18,
"prompt_tokens": 14,
"total_tokens": 32
}
}
```
### Call `/completions`
In order to use your custom OpenAI Completion proxy with LiteLLM, ensure you set
* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
* `custom_llm_provider` to `text-completion-openai` this ensures litellm uses the `openai.Completion` to your api_base
```python
import os
from litellm import completion
## set ENV variables
os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
messages = [{ "content": "Hello, how are you?","role": "user"}]
response = completion(
model="command-nightly",
messages=[{ "content": "Hello, how are you?","role": "user"}],
api_base="https://openai-proxy.berriai.repl.co",
custom_llm_provider="text-completion-openai" # litellm will use the openai.Completion to make the request
)
print(response)
```
#### Response
```json
{
"warning":
"This model version is deprecated. Migrate before January 4, 2024 to avoid disruption of service. Learn more https://platform.openai.com/docs/deprecations",
"id":
"cmpl-8HxHqF5dymQdALmLplS0dWKZVFe3r",
"object":
"text_completion",
"created":
1699290166,
"model":
"text-davinci-003",
"choices": [{
"text":
"\n\nThe weather in San Francisco varies depending on what time of year and time",
"index": 0,
"logprobs": None,
"finish_reason": "length"
}],
"usage": {
"prompt_tokens": 7,
"completion_tokens": 16,
"total_tokens": 23
}
}
```

View file

@ -175,7 +175,8 @@ const sidebars = {
"providers/aleph_alpha", "providers/aleph_alpha",
"providers/baseten", "providers/baseten",
"providers/openrouter", "providers/openrouter",
"providers/custom_openai_proxy", // "providers/custom_openai_proxy",
"providers/custom_llm_server",
"providers/petals", "providers/petals",
], ],