diff --git a/docs/my-website/docs/providers/huggingface.md b/docs/my-website/docs/providers/huggingface.md new file mode 100644 index 0000000000..330518bb33 --- /dev/null +++ b/docs/my-website/docs/providers/huggingface.md @@ -0,0 +1,56 @@ +import Image from '@theme/IdealImage'; + +# Huggingface + +LiteLLM supports Huggingface Inference Endpoints. It uses the [text-generation-inference](https://github.com/huggingface/text-generation-inference) format. You can use any chat/text model from Hugging Face with the following steps: + +* Copy your model id/url from Huggingface Inference Endpoints + - [ ] Go to https://ui.endpoints.huggingface.co/ + - [ ] Copy the url of the specific model you'd like to use + HF_Dashboard +* Set it as your model name +* Set your HUGGINGFACE_API_KEY as an environment variable + +Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint) + +## usage + +In this case our model id is the same as the model url - `https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud` + + + +You need to tell LiteLLM when you're calling Huggingface. You can do that in 2 ways: + +* By passing in the custom llm provider as part of the model name - +completion(model="/",...). + +``` +import os +from litellm import completion + +# Set env variables +os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key" + +messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}] + +# model = / +response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages) + +print(response) +``` + +* By passing in a `custom_llm_provider` argument in the completion call + +``` +import os +from litellm import completion + +# Set env variables +os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key" + +messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}] + +response = completion(model="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, custom_llm_provider="huggingface") +# Add any assertions here to check the response +print(response) +``` \ No newline at end of file diff --git a/docs/my-website/img/hf_inference_endpoint.png b/docs/my-website/img/hf_inference_endpoint.png new file mode 100644 index 0000000000..22bc891088 Binary files /dev/null and b/docs/my-website/img/hf_inference_endpoint.png differ diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 6a6b102535..04b2b94359 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -30,6 +30,17 @@ const sidebars = { items: ["embedding/supported_embedding"], }, 'completion/supported', + { + type: "category", + label: "Providers", + link: { + type: 'generated-index', + title: 'Providers', + description: 'Learn how to deploy + call models from different providers on LiteLLM', + slug: '/providers', + }, + items: ["providers/huggingface"], + }, "token_usage", "exception_mapping", 'debugging/hosted_debugging', diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc index 5ac014955f..402182564a 100644 Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py index 50cd823c47..8d652cf671 100644 --- a/litellm/llms/huggingface_restapi.py +++ b/litellm/llms/huggingface_restapi.py @@ -46,8 +46,8 @@ class HuggingfaceRestAPILLM: logger_fn=None, ): # logic for parsing in - calling - parsing out model completion calls completion_url: str = "" - if custom_api_base: - completion_url = custom_api_base + if "https" in model: + completion_url = model elif "HF_API_BASE" in os.environ: completion_url = os.getenv("HF_API_BASE", "") else: diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index 51081436ce..31531678ca 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -87,7 +87,7 @@ def test_completion_hf_deployed_api(): try: user_message = "There's a llama in my garden 😱 What should I do?" messages = [{ "content": user_message,"role": "user"}] - response = completion(model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0", messages=messages, custom_api_base="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", logger_fn=logger_fn) + response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn) # Add any assertions here to check the response print(response) except Exception as e: