add hf embedding models

2025-04-25 10:44:24 +00:00 · 2023-09-29 11:57:12 -07:00 · 2023-09-29 11:57:12 -07:00 · 3fbad7dfa7
commit 3fbad7dfa7
parent f04d50d119
2 changed files with 100 additions and 16 deletions
--- a/litellm/llms/huggingface_restapi.py
+++ b/litellm/llms/huggingface_restapi.py
@ -266,6 +266,78 @@ def completion(
        }
        return model_response

-def embedding():
-    # logic for parsing in - calling - parsing out model embedding calls
-    pass
+
+def embedding(
+    model: str,
+    input: list,
+    api_key: str,
+    api_base: str,
+    logging_obj=None,
+    model_response=None,
+    encoding=None,
+):
+    headers = validate_environment(api_key)
+    # print_verbose(f"{model}, {task}")
+    embed_url = ""
+    if "https" in model:
+        embed_url = model
+    elif api_base:
+        embed_url = api_base
+    elif "HF_API_BASE" in os.environ:
+        embed_url = os.getenv("HF_API_BASE", "")
+    elif "HUGGINGFACE_API_BASE" in os.environ:
+        embed_url = os.getenv("HUGGINGFACE_API_BASE", "")
+    else:
+        embed_url = f"https://api-inference.huggingface.co/models/{model}"
+    
+    data = {
+        "inputs": input
+    }
+    
+    ## LOGGING
+    logging_obj.pre_call(
+            input=input,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+        )
+    ## COMPLETION CALL
+    response = requests.post(
+        embed_url, headers=headers, data=json.dumps(data)
+    )
+
+  
+    ## LOGGING
+    logging_obj.post_call(
+            input=input,
+            api_key=api_key,
+            additional_args={"complete_input_dict": data},
+            original_response=response,
+        )
+
+
+    embeddings = response.json()
+
+    output_data = []
+    for idx, embedding in enumerate(embeddings):
+        output_data.append(
+            {
+                "object": "embedding",
+                "index": idx,
+                "embedding": embedding[0][0] # flatten list returned from hf
+            }
+        )
+    model_response["object"] = "list"
+    model_response["data"] = output_data
+    model_response["model"] = model
+    input_tokens = 0
+    for text in input:
+        input_tokens+=len(encoding.encode(text)) 
+
+    model_response["usage"] = { 
+        "prompt_tokens": input_tokens, 
+        "total_tokens": input_tokens,
+    }
+    return model_response
+
+
+