forked from phoenix/litellm-mirror
add huggingface
This commit is contained in:
parent
daa949a539
commit
0ea59702fd
6 changed files with 70 additions and 3 deletions
56
docs/my-website/docs/providers/huggingface.md
Normal file
56
docs/my-website/docs/providers/huggingface.md
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
|
# Huggingface
|
||||||
|
|
||||||
|
LiteLLM supports Huggingface Inference Endpoints. It uses the [text-generation-inference](https://github.com/huggingface/text-generation-inference) format. You can use any chat/text model from Hugging Face with the following steps:
|
||||||
|
|
||||||
|
* Copy your model id/url from Huggingface Inference Endpoints
|
||||||
|
- [ ] Go to https://ui.endpoints.huggingface.co/
|
||||||
|
- [ ] Copy the url of the specific model you'd like to use
|
||||||
|
<Image img={require('../../img/hf_inference_endpoint.png')} alt="HF_Dashboard" style={{ maxWidth: '50%', height: 'auto' }}/>
|
||||||
|
* Set it as your model name
|
||||||
|
* Set your HUGGINGFACE_API_KEY as an environment variable
|
||||||
|
|
||||||
|
Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint)
|
||||||
|
|
||||||
|
## usage
|
||||||
|
|
||||||
|
In this case our model id is the same as the model url - `https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud`
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
You need to tell LiteLLM when you're calling Huggingface. You can do that in 2 ways:
|
||||||
|
|
||||||
|
* By passing in the custom llm provider as part of the model name -
|
||||||
|
completion(model="<custom_llm_provider>/<model_id>",...).
|
||||||
|
|
||||||
|
```
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# Set env variables
|
||||||
|
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||||
|
|
||||||
|
messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
|
||||||
|
|
||||||
|
# model = <custom_llm_provider>/<model_id>
|
||||||
|
response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages)
|
||||||
|
|
||||||
|
print(response)
|
||||||
|
```
|
||||||
|
|
||||||
|
* By passing in a `custom_llm_provider` argument in the completion call
|
||||||
|
|
||||||
|
```
|
||||||
|
import os
|
||||||
|
from litellm import completion
|
||||||
|
|
||||||
|
# Set env variables
|
||||||
|
os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
|
||||||
|
|
||||||
|
messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
|
||||||
|
|
||||||
|
response = completion(model="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, custom_llm_provider="huggingface")
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
```
|
BIN
docs/my-website/img/hf_inference_endpoint.png
Normal file
BIN
docs/my-website/img/hf_inference_endpoint.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 160 KiB |
|
@ -30,6 +30,17 @@ const sidebars = {
|
||||||
items: ["embedding/supported_embedding"],
|
items: ["embedding/supported_embedding"],
|
||||||
},
|
},
|
||||||
'completion/supported',
|
'completion/supported',
|
||||||
|
{
|
||||||
|
type: "category",
|
||||||
|
label: "Providers",
|
||||||
|
link: {
|
||||||
|
type: 'generated-index',
|
||||||
|
title: 'Providers',
|
||||||
|
description: 'Learn how to deploy + call models from different providers on LiteLLM',
|
||||||
|
slug: '/providers',
|
||||||
|
},
|
||||||
|
items: ["providers/huggingface"],
|
||||||
|
},
|
||||||
"token_usage",
|
"token_usage",
|
||||||
"exception_mapping",
|
"exception_mapping",
|
||||||
'debugging/hosted_debugging',
|
'debugging/hosted_debugging',
|
||||||
|
|
Binary file not shown.
|
@ -46,8 +46,8 @@ class HuggingfaceRestAPILLM:
|
||||||
logger_fn=None,
|
logger_fn=None,
|
||||||
): # logic for parsing in - calling - parsing out model completion calls
|
): # logic for parsing in - calling - parsing out model completion calls
|
||||||
completion_url: str = ""
|
completion_url: str = ""
|
||||||
if custom_api_base:
|
if "https" in model:
|
||||||
completion_url = custom_api_base
|
completion_url = model
|
||||||
elif "HF_API_BASE" in os.environ:
|
elif "HF_API_BASE" in os.environ:
|
||||||
completion_url = os.getenv("HF_API_BASE", "")
|
completion_url = os.getenv("HF_API_BASE", "")
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -87,7 +87,7 @@ def test_completion_hf_deployed_api():
|
||||||
try:
|
try:
|
||||||
user_message = "There's a llama in my garden 😱 What should I do?"
|
user_message = "There's a llama in my garden 😱 What should I do?"
|
||||||
messages = [{ "content": user_message,"role": "user"}]
|
messages = [{ "content": user_message,"role": "user"}]
|
||||||
response = completion(model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0", messages=messages, custom_api_base="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", logger_fn=logger_fn)
|
response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue