forked from phoenix/litellm-mirror
docs(custom_llm_server.md): add calling custom llm server to docs
This commit is contained in:
parent
060249c7e0
commit
a2d07cfe64
3 changed files with 75 additions and 130 deletions
73
docs/my-website/docs/providers/custom_llm_server.md
Normal file
73
docs/my-website/docs/providers/custom_llm_server.md
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
# Custom API Server (Custom Format)
|
||||||
|
|
||||||
|
LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
|
||||||
|
|
||||||
|
|
||||||
|
:::info
|
||||||
|
|
||||||
|
For calling an openai-compatible endpoint, [go here](./openai_compatible.md)
|
||||||
|
:::
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```python
|
||||||
|
import litellm
|
||||||
|
from litellm import CustomLLM, completion, get_llm_provider
|
||||||
|
|
||||||
|
|
||||||
|
class MyCustomLLM(CustomLLM):
|
||||||
|
def completion(self, *args, **kwargs) -> litellm.ModelResponse:
|
||||||
|
return litellm.completion(
|
||||||
|
model="gpt-3.5-turbo",
|
||||||
|
messages=[{"role": "user", "content": "Hello world"}],
|
||||||
|
mock_response="Hi!",
|
||||||
|
) # type: ignore
|
||||||
|
|
||||||
|
litellm.custom_provider_map = [ # 👈 KEY STEP - REGISTER HANDLER
|
||||||
|
{"provider": "my-custom-llm", "custom_handler": my_custom_llm}
|
||||||
|
]
|
||||||
|
|
||||||
|
resp = completion(
|
||||||
|
model="my-custom-llm/my-fake-model",
|
||||||
|
messages=[{"role": "user", "content": "Hello world!"}],
|
||||||
|
)
|
||||||
|
|
||||||
|
assert resp.choices[0].message.content == "Hi!"
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Custom Handler Spec
|
||||||
|
|
||||||
|
```python
|
||||||
|
from litellm.types.utils import GenericStreamingChunk, ModelResponse
|
||||||
|
from typing import Iterator, AsyncIterator
|
||||||
|
from litellm.llms.base import BaseLLM
|
||||||
|
|
||||||
|
class CustomLLMError(Exception): # use this for all your exceptions
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
status_code,
|
||||||
|
message,
|
||||||
|
):
|
||||||
|
self.status_code = status_code
|
||||||
|
self.message = message
|
||||||
|
super().__init__(
|
||||||
|
self.message
|
||||||
|
) # Call the base class constructor with the parameters it needs
|
||||||
|
|
||||||
|
class CustomLLM(BaseLLM):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
def completion(self, *args, **kwargs) -> ModelResponse:
|
||||||
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||||
|
|
||||||
|
def streaming(self, *args, **kwargs) -> Iterator[GenericStreamingChunk]:
|
||||||
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||||
|
|
||||||
|
async def acompletion(self, *args, **kwargs) -> ModelResponse:
|
||||||
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||||
|
|
||||||
|
async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
|
||||||
|
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||||
|
```
|
|
@ -1,129 +0,0 @@
|
||||||
# Custom API Server (OpenAI Format)
|
|
||||||
|
|
||||||
LiteLLM allows you to call your custom endpoint in the OpenAI ChatCompletion format
|
|
||||||
|
|
||||||
## API KEYS
|
|
||||||
No api keys required
|
|
||||||
|
|
||||||
## Set up your Custom API Server
|
|
||||||
Your server should have the following Endpoints:
|
|
||||||
|
|
||||||
Here's an example OpenAI proxy server with routes: https://replit.com/@BerriAI/openai-proxy#main.py
|
|
||||||
|
|
||||||
### Required Endpoints
|
|
||||||
- POST `/chat/completions` - chat completions endpoint
|
|
||||||
|
|
||||||
### Optional Endpoints
|
|
||||||
- POST `/completions` - completions endpoint
|
|
||||||
- Get `/models` - available models on server
|
|
||||||
- POST `/embeddings` - creates an embedding vector representing the input text.
|
|
||||||
|
|
||||||
|
|
||||||
## Example Usage
|
|
||||||
|
|
||||||
### Call `/chat/completions`
|
|
||||||
In order to use your custom OpenAI Chat Completion proxy with LiteLLM, ensure you set
|
|
||||||
|
|
||||||
* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
|
|
||||||
* `custom_llm_provider` to `openai` this ensures litellm uses the `openai.ChatCompletion` to your api_base
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
|
|
||||||
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="command-nightly",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
api_base="https://openai-proxy.berriai.repl.co",
|
|
||||||
custom_llm_provider="openai" # litellm will use the openai.ChatCompletion to make the request
|
|
||||||
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Response
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"object":
|
|
||||||
"chat.completion",
|
|
||||||
"choices": [{
|
|
||||||
"finish_reason": "stop",
|
|
||||||
"index": 0,
|
|
||||||
"message": {
|
|
||||||
"content":
|
|
||||||
"The sky, a canvas of blue,\nA work of art, pure and true,\nA",
|
|
||||||
"role": "assistant"
|
|
||||||
}
|
|
||||||
}],
|
|
||||||
"id":
|
|
||||||
"chatcmpl-7fbd6077-de10-4cb4-a8a4-3ef11a98b7c8",
|
|
||||||
"created":
|
|
||||||
1699290237.408061,
|
|
||||||
"model":
|
|
||||||
"togethercomputer/llama-2-70b-chat",
|
|
||||||
"usage": {
|
|
||||||
"completion_tokens": 18,
|
|
||||||
"prompt_tokens": 14,
|
|
||||||
"total_tokens": 32
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
### Call `/completions`
|
|
||||||
In order to use your custom OpenAI Completion proxy with LiteLLM, ensure you set
|
|
||||||
|
|
||||||
* `api_base` to your proxy url, example "https://openai-proxy.berriai.repl.co"
|
|
||||||
* `custom_llm_provider` to `text-completion-openai` this ensures litellm uses the `openai.Completion` to your api_base
|
|
||||||
|
|
||||||
```python
|
|
||||||
import os
|
|
||||||
from litellm import completion
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["OPENAI_API_KEY"] = "anything" #key is not used for proxy
|
|
||||||
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
|
|
||||||
response = completion(
|
|
||||||
model="command-nightly",
|
|
||||||
messages=[{ "content": "Hello, how are you?","role": "user"}],
|
|
||||||
api_base="https://openai-proxy.berriai.repl.co",
|
|
||||||
custom_llm_provider="text-completion-openai" # litellm will use the openai.Completion to make the request
|
|
||||||
|
|
||||||
)
|
|
||||||
print(response)
|
|
||||||
```
|
|
||||||
|
|
||||||
#### Response
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"warning":
|
|
||||||
"This model version is deprecated. Migrate before January 4, 2024 to avoid disruption of service. Learn more https://platform.openai.com/docs/deprecations",
|
|
||||||
"id":
|
|
||||||
"cmpl-8HxHqF5dymQdALmLplS0dWKZVFe3r",
|
|
||||||
"object":
|
|
||||||
"text_completion",
|
|
||||||
"created":
|
|
||||||
1699290166,
|
|
||||||
"model":
|
|
||||||
"text-davinci-003",
|
|
||||||
"choices": [{
|
|
||||||
"text":
|
|
||||||
"\n\nThe weather in San Francisco varies depending on what time of year and time",
|
|
||||||
"index": 0,
|
|
||||||
"logprobs": None,
|
|
||||||
"finish_reason": "length"
|
|
||||||
}],
|
|
||||||
"usage": {
|
|
||||||
"prompt_tokens": 7,
|
|
||||||
"completion_tokens": 16,
|
|
||||||
"total_tokens": 23
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
|
@ -175,7 +175,8 @@ const sidebars = {
|
||||||
"providers/aleph_alpha",
|
"providers/aleph_alpha",
|
||||||
"providers/baseten",
|
"providers/baseten",
|
||||||
"providers/openrouter",
|
"providers/openrouter",
|
||||||
"providers/custom_openai_proxy",
|
// "providers/custom_openai_proxy",
|
||||||
|
"providers/custom_llm_server",
|
||||||
"providers/petals",
|
"providers/petals",
|
||||||
|
|
||||||
],
|
],
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue