forked from phoenix/litellm-mirror
LiteLLM Minor Fixes & Improvements (10/07/2024) (#6101)
* fix(utils.py): support dropping temperature param for azure o1 models * fix(main.py): handle azure o1 streaming requests o1 doesn't support streaming, fake it to ensure code works as expected * feat(utils.py): expose `hosted_vllm/` endpoint, with tool handling for vllm Fixes https://github.com/BerriAI/litellm/issues/6088 * refactor(internal_user_endpoints.py): cleanup unused params + update docstring Closes https://github.com/BerriAI/litellm/issues/6100 * fix(main.py): expose custom image generation api support Fixes https://github.com/BerriAI/litellm/issues/6097 * fix: fix linting errors * docs(custom_llm_server.md): add docs on custom api for image gen calls * fix(types/utils.py): handle dict type * fix(types/utils.py): fix linting errors
This commit is contained in:
parent
5de69cb1b2
commit
6729c9ca7f
17 changed files with 643 additions and 76 deletions
|
@ -183,11 +183,80 @@ class UnixTimeLLM(CustomLLM):
|
|||
unixtime = UnixTimeLLM()
|
||||
```
|
||||
|
||||
## Image Generation
|
||||
|
||||
1. Setup your `custom_handler.py` file
|
||||
```python
|
||||
import litellm
|
||||
from litellm import CustomLLM
|
||||
from litellm.types.utils import ImageResponse, ImageObject
|
||||
|
||||
|
||||
class MyCustomLLM(CustomLLM):
|
||||
async def aimage_generation(self, model: str, prompt: str, model_response: ImageResponse, optional_params: dict, logging_obj: Any, timeout: Optional[Union[float, httpx.Timeout]] = None, client: Optional[AsyncHTTPHandler] = None,) -> ImageResponse:
|
||||
return ImageResponse(
|
||||
created=int(time.time()),
|
||||
data=[ImageObject(url="https://example.com/image.png")],
|
||||
)
|
||||
|
||||
my_custom_llm = MyCustomLLM()
|
||||
```
|
||||
|
||||
|
||||
2. Add to `config.yaml`
|
||||
|
||||
In the config below, we pass
|
||||
|
||||
python_filename: `custom_handler.py`
|
||||
custom_handler_instance_name: `my_custom_llm`. This is defined in Step 1
|
||||
|
||||
custom_handler: `custom_handler.my_custom_llm`
|
||||
|
||||
```yaml
|
||||
model_list:
|
||||
- model_name: "test-model"
|
||||
litellm_params:
|
||||
model: "openai/text-embedding-ada-002"
|
||||
- model_name: "my-custom-model"
|
||||
litellm_params:
|
||||
model: "my-custom-llm/my-model"
|
||||
|
||||
litellm_settings:
|
||||
custom_provider_map:
|
||||
- {"provider": "my-custom-llm", "custom_handler": custom_handler.my_custom_llm}
|
||||
```
|
||||
|
||||
```bash
|
||||
litellm --config /path/to/config.yaml
|
||||
```
|
||||
|
||||
3. Test it!
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://0.0.0.0:4000/v1/images/generations' \
|
||||
-H 'Content-Type: application/json' \
|
||||
-H 'Authorization: Bearer sk-1234' \
|
||||
-d '{
|
||||
"model": "my-custom-model",
|
||||
"prompt": "A cute baby sea otter",
|
||||
}'
|
||||
```
|
||||
|
||||
Expected Response
|
||||
|
||||
```
|
||||
{
|
||||
"created": 1721955063,
|
||||
"data": [{"url": "https://example.com/image.png"}],
|
||||
}
|
||||
```
|
||||
|
||||
|
||||
## Custom Handler Spec
|
||||
|
||||
```python
|
||||
from litellm.types.utils import GenericStreamingChunk, ModelResponse
|
||||
from typing import Iterator, AsyncIterator
|
||||
from litellm.types.utils import GenericStreamingChunk, ModelResponse, ImageResponse
|
||||
from typing import Iterator, AsyncIterator, Any, Optional, Union
|
||||
from litellm.llms.base import BaseLLM
|
||||
|
||||
class CustomLLMError(Exception): # use this for all your exceptions
|
||||
|
@ -217,4 +286,28 @@ class CustomLLM(BaseLLM):
|
|||
|
||||
async def astreaming(self, *args, **kwargs) -> AsyncIterator[GenericStreamingChunk]:
|
||||
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||
|
||||
def image_generation(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
model_response: ImageResponse,
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[HTTPHandler] = None,
|
||||
) -> ImageResponse:
|
||||
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||
|
||||
async def aimage_generation(
|
||||
self,
|
||||
model: str,
|
||||
prompt: str,
|
||||
model_response: ImageResponse,
|
||||
optional_params: dict,
|
||||
logging_obj: Any,
|
||||
timeout: Optional[Union[float, httpx.Timeout]] = None,
|
||||
client: Optional[AsyncHTTPHandler] = None,
|
||||
) -> ImageResponse:
|
||||
raise CustomLLMError(status_code=500, message="Not implemented yet!")
|
||||
```
|
||||
|
|
|
@ -12,14 +12,14 @@ vLLM Provides an OpenAI compatible endpoints - here's how to call it with LiteLL
|
|||
|
||||
In order to use litellm to call a hosted vllm server add the following to your completion call
|
||||
|
||||
* `model="openai/<your-vllm-model-name>"`
|
||||
* `model="hosted_vllm/<your-vllm-model-name>"`
|
||||
* `api_base = "your-hosted-vllm-server"`
|
||||
|
||||
```python
|
||||
import litellm
|
||||
|
||||
response = litellm.completion(
|
||||
model="openai/facebook/opt-125m", # pass the vllm model name
|
||||
model="hosted_vllm/facebook/opt-125m", # pass the vllm model name
|
||||
messages=messages,
|
||||
api_base="https://hosted-vllm-api.co",
|
||||
temperature=0.2,
|
||||
|
@ -39,7 +39,7 @@ Here's how to call an OpenAI-Compatible Endpoint with the LiteLLM Proxy Server
|
|||
model_list:
|
||||
- model_name: my-model
|
||||
litellm_params:
|
||||
model: openai/facebook/opt-125m # add openai/ prefix to route as OpenAI provider
|
||||
model: hosted_vllm/facebook/opt-125m # add hosted_vllm/ prefix to route as OpenAI provider
|
||||
api_base: https://hosted-vllm-api.co # add api base for OpenAI compatible provider
|
||||
```
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue