From 1ca4d6ff8cb373f2e36e351a9634bdd43a66f18a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Mon, 14 Aug 2023 14:47:59 -0700
Subject: [PATCH] adding docs

---
 .../docs/completion/huggingface_tutorial.md   | 76 ++++++++++++++++++-
 1 file changed, 73 insertions(+), 3 deletions(-)

diff --git a/docs/my-website/docs/completion/huggingface_tutorial.md b/docs/my-website/docs/completion/huggingface_tutorial.md
index ea822dec56..c5e09c7bf6 100644
--- a/docs/my-website/docs/completion/huggingface_tutorial.md
+++ b/docs/my-website/docs/completion/huggingface_tutorial.md
@@ -24,10 +24,11 @@ completion(model=model, messages=messages, custom_llm_provider="huggingface")
 ```
 
 What's happening? 
-- model - this is the name of the deployed model on huggingface 
-- messages - this is the input. We accept the OpenAI chat format. For huggingface, by default we iterate through the list and add the message["content"] to the prompt.
+- model: This is the name of the deployed model on huggingface 
+- messages: This is the input. We accept the OpenAI chat format. For huggingface, by default we iterate through the list and add the message["content"] to the prompt. [Relevant Code](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L46)
+- custom_llm_provider: Optional param. This is an optional flag, needed only for Azure, Replicate, Huggingface and Together-ai (platforms where you deploy your own models). This enables litellm to route to the right provider, for your model. 
 
-### Case 2: Call Llama2 public endpoint
+### Case 2: Call Llama2 public Huggingface endpoint
 
 We've deployed `meta-llama/Llama-2-7b-hf` behind a public endpoint - `https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud`.
 
@@ -43,3 +44,72 @@ custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.
 completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
 ```
 
+What's happening? 
+- custom_api_base: Optional param. Since this uses a deployed endpoint (not the [default huggingface inference endpoint](https://github.com/BerriAI/litellm/blob/6aff47083be659b80e00cb81eb783cb24db2e183/litellm/llms/huggingface_restapi.py#L35)), we pass that to LiteLLM. 
+
+### Case 3: Call Llama2 private Huggingface endpoint
+
+The only difference between this and the public endpoint, is that you need an `api_key` for this. 
+
+On LiteLLM there's 3 ways you can pass in an api_key. 
+
+Either via environment variables, by setting it as a package variable or when calling `completion()`. 
+
+**Setting via environment variables**  
+Here's the 1 line of code you need to add 
+```
+os.environ["HF_TOKEN] = "..."
+```
+
+Here's the full code: 
+```
+from litellm import completion 
+
+os.environ["HF_TOKEN] = "..."
+
+model = "meta-llama/Llama-2-7b-hf"
+messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
+custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+
+### CALLING ENDPOINT
+completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+```
+
+**Setting it as package variable**  
+Here's the 1 line of code you need to add 
+```
+litellm.huggingface_key = "..."
+```
+
+Here's the full code: 
+```
+import litellm
+from litellm import completion 
+
+litellm.huggingface_key = "..."
+
+model = "meta-llama/Llama-2-7b-hf"
+messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
+custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+
+### CALLING ENDPOINT
+completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base)
+```
+
+**Passed in during completion call**  
+```
+completion(..., api_key="...")
+```
+
+Here's the full code: 
+
+```
+from litellm import completion 
+
+model = "meta-llama/Llama-2-7b-hf"
+messages = [{"role": "user", "content": "Hey, how's it going?"}] # LiteLLM follows the OpenAI format 
+custom_api_base = "https://ag3dkq4zui5nu8g3.us-east-1.aws.endpoints.huggingface.cloud"
+
+### CALLING ENDPOINT
+completion(model=model, messages=messages, custom_llm_provider="huggingface", custom_api_base=custom_api_base, api_key="...")
+```