diff --git a/docs/my-website/docs/providers/huggingface.md b/docs/my-website/docs/providers/huggingface.md
new file mode 100644
index 0000000000..330518bb33
--- /dev/null
+++ b/docs/my-website/docs/providers/huggingface.md
@@ -0,0 +1,56 @@
+import Image from '@theme/IdealImage';
+
+# Huggingface
+
+LiteLLM supports Huggingface Inference Endpoints. It uses the [text-generation-inference](https://github.com/huggingface/text-generation-inference) format. You can use any chat/text model from Hugging Face with the following steps:
+
+* Copy your model id/url from Huggingface Inference Endpoints
+ - [ ] Go to https://ui.endpoints.huggingface.co/
+ - [ ] Copy the url of the specific model you'd like to use
+
+* Set it as your model name
+* Set your HUGGINGFACE_API_KEY as an environment variable
+
+Need help deploying a model on huggingface? [Check out this guide.](https://huggingface.co/docs/inference-endpoints/guides/create_endpoint)
+
+## usage
+
+In this case our model id is the same as the model url - `https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud`
+
+
+
+You need to tell LiteLLM when you're calling Huggingface. You can do that in 2 ways:
+
+* By passing in the custom llm provider as part of the model name -
+completion(model="/",...).
+
+```
+import os
+from litellm import completion
+
+# Set env variables
+os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
+
+messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
+
+# model = /
+response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages)
+
+print(response)
+```
+
+* By passing in a `custom_llm_provider` argument in the completion call
+
+```
+import os
+from litellm import completion
+
+# Set env variables
+os.environ["HUGGINGFACE_API_KEY"] = "huggingface_api_key"
+
+messages = [{ "content": "There's a llama in my garden 😱 What should I do?","role": "user"}]
+
+response = completion(model="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, custom_llm_provider="huggingface")
+# Add any assertions here to check the response
+print(response)
+```
\ No newline at end of file
diff --git a/docs/my-website/img/hf_inference_endpoint.png b/docs/my-website/img/hf_inference_endpoint.png
new file mode 100644
index 0000000000..22bc891088
Binary files /dev/null and b/docs/my-website/img/hf_inference_endpoint.png differ
diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js
index 6a6b102535..04b2b94359 100644
--- a/docs/my-website/sidebars.js
+++ b/docs/my-website/sidebars.js
@@ -30,6 +30,17 @@ const sidebars = {
items: ["embedding/supported_embedding"],
},
'completion/supported',
+ {
+ type: "category",
+ label: "Providers",
+ link: {
+ type: 'generated-index',
+ title: 'Providers',
+ description: 'Learn how to deploy + call models from different providers on LiteLLM',
+ slug: '/providers',
+ },
+ items: ["providers/huggingface"],
+ },
"token_usage",
"exception_mapping",
'debugging/hosted_debugging',
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 5ac014955f..402182564a 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/llms/huggingface_restapi.py b/litellm/llms/huggingface_restapi.py
index 50cd823c47..8d652cf671 100644
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@@ -46,8 +46,8 @@ class HuggingfaceRestAPILLM:
logger_fn=None,
): # logic for parsing in - calling - parsing out model completion calls
completion_url: str = ""
- if custom_api_base:
- completion_url = custom_api_base
+ if "https" in model:
+ completion_url = model
elif "HF_API_BASE" in os.environ:
completion_url = os.getenv("HF_API_BASE", "")
else:
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 51081436ce..31531678ca 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -87,7 +87,7 @@ def test_completion_hf_deployed_api():
try:
user_message = "There's a llama in my garden 😱 What should I do?"
messages = [{ "content": user_message,"role": "user"}]
- response = completion(model="huggingface/WizardLM/WizardCoder-Python-34B-V1.0", messages=messages, custom_api_base="https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", logger_fn=logger_fn)
+ response = completion(model="huggingface/https://ji16r2iys9a8rjk2.us-east-1.aws.endpoints.huggingface.cloud", messages=messages, logger_fn=logger_fn)
# Add any assertions here to check the response
print(response)
except Exception as e: