add huggingface

2023-08-30 16:05:33 -07:00 · 2023-08-30 16:05:33 -07:00 · 0ea59702fd
commit 0ea59702fd
parent daa949a539
6 changed files with 70 additions and 3 deletions
--- a/docs/my-website/docs/providers/huggingface.md
+++ b/docs/my-website/docs/providers/huggingface.md
@ -0,0 +1,56 @@
 import Image from '@theme/IdealImage';
 # Huggingface
 LiteLLM supports Huggingface Inference Endpoints. It uses the [text-generation-inference](https://github.com/huggingface/text-generation-inference) format. You can use any chat/text model from Hugging Face with the following steps:
 * Copy your model id/url from Huggingface Inference Endpoints
    - [ ] Go to https://ui.endpoints.huggingface.co/
    - [ ] Copy the url of the specific model you'd like to use 
    <Image img={require('../../img/hf_inference_endpoint.png')} alt="HF_Dashboard" style={{ maxWidth: '50%', height: 'auto' }}/>
 * Set it as your model name
 * Set your HUGGINGFACE_API_KEY as an environment variable
 Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint)
 ## usage 
 In this case our model id is the same as the model url - `https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud`
 You need to tell LiteLLM when you're calling Huggingface. You can do that in 2 ways:
 * By passing in the custom llm provider as part of the model name -  
 completion(model="<custom_llm_provider>/<model_id>",...). 
 ```
 import os 
 from litellm import completion 
 # Set env variables
 os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
 messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
 # model = <custom_llm_provider>/<model_id>
 response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages)
 print(response)
 ```
 * By passing in a `custom_llm_provider` argument in the completion call
 ```
 import os 
 from litellm import completion 
 # Set env variables
 os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
 messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
 response = completion(model="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, custom_llm_provider="huggingface")
 # Add any assertions here to check the response
 print(response)
 ```
--- a/docs/my-website/img/hf_inference_endpoint.png
+++ b/docs/my-website/img/hf_inference_endpoint.png
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@ -30,6 +30,17 @@ const sidebars = {
      items: ["embedding/supported_embedding"],
    },
    'completion/supported',
    {
      type: "category",
      label: "Providers",
      link: {
        type: 'generated-index',
        title: 'Providers',
        description: 'Learn how to deploy + call models from different providers on LiteLLM',
        slug: '/providers',
      },
      items: ["providers/huggingface"],
    },
    "token_usage",
    "exception_mapping",
    'debugging/hosted_debugging',
--- a/litellm/pycache/utils.cpython-311.pyc
+++ b/litellm/pycache/utils.cpython-311.pyc
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -46,8 +46,8 @@ class HuggingfaceRestAPILLM:
        logger_fn=None,
    ):  # logic for parsing in - calling - parsing out model completion calls
        completion_url: str = ""
-        if custom_api_base:
+        if "https" in model:
-            completion_url = custom_api_base
+            completion_url = model
        elif "HF_API_BASE" in os.environ:
            completion_url = os.getenv("HF_API_BASE", "")
        else:
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -87,7 +87,7 @@ def test_completion_hf_deployed_api():
    try:
        user_message = "There's a llama in my garden 😱 What should I do?"
        messages = [{ "content": user_message,"role": "user"}]
-        response = completion(model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0", messages=messages, custom_api_base="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", logger_fn=logger_fn)
+        response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn)
        # Add any assertions here to check the response
        print(response)
    except Exception as e: