diff --git a/README.md b/README.md index fe7d56b6c4..91b709442b 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,7 @@ curl 'http://0.0.0.0:4000/key/generate' \ | [IBM - watsonx.ai](https://docs.litellm.ai/docs/providers/watsonx) | ✅ | ✅ | ✅ | ✅ | ✅ | | | [voyage ai](https://docs.litellm.ai/docs/providers/voyage) | | | | | ✅ | | | [xinference [Xorbits Inference]](https://docs.litellm.ai/docs/providers/xinference) | | | | | ✅ | | +| [FriendliAI](https://docs.litellm.ai/docs/providers/friendliai) | ✅ | ✅ | ✅ | ✅ | | | [**Read the Docs**](https://docs.litellm.ai/docs/) diff --git a/litellm/__init__.py b/litellm/__init__.py index a99ed20aa2..43ca239484 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -401,6 +401,7 @@ openai_compatible_endpoints: List = [ "api.groq.com/openai/v1", "api.deepseek.com/v1", "api.together.xyz/v1", + "inference.friendli.ai/v1", ] # this is maintained for Exception Mapping @@ -415,6 +416,7 @@ openai_compatible_providers: List = [ "xinference", "together_ai", "fireworks_ai", + "friendliai", "azure_ai", ] @@ -644,6 +646,7 @@ provider_list: List = [ "cloudflare", "xinference", "fireworks_ai", + "friendliai", "watsonx", "triton", "predibase", diff --git a/litellm/main.py b/litellm/main.py index b0fe59e62d..ae9b06b3a4 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -1180,7 +1180,7 @@ def completion( # note: if a user sets a custom base - we should ensure this works # allow for the setting of dynamic and stateful api-bases api_base = ( - api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there or litellm.api_base or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" @@ -1194,7 +1194,7 @@ def completion( # set API KEY api_key = ( api_key - or litellm.api_key # for deepinfra/perplexity/anyscale we check in get_llm_provider and pass in the api key from there + or litellm.api_key # for deepinfra/perplexity/anyscale/friendliai we check in get_llm_provider and pass in the api key from there or litellm.openai_key or get_secret("OPENAI_API_KEY") ) @@ -4401,7 +4401,7 @@ def speech( response: Optional[HttpxBinaryResponseContent] = None if custom_llm_provider == "openai": api_base = ( - api_base # for deepinfra/perplexity/anyscale/groq we check in get_llm_provider and pass in the api base from there + api_base # for deepinfra/perplexity/anyscale/groq/friendliai we check in get_llm_provider and pass in the api base from there or litellm.api_base or get_secret("OPENAI_API_BASE") or "https://api.openai.com/v1" diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 7438241543..ef07d87ccb 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -922,6 +922,36 @@ "mode": "chat", "supports_function_calling": true }, + "friendliai/mixtral-8x7b-instruct-v0-1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, + "friendliai/meta-llama-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, + "friendliai/meta-llama-3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000008, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, "claude-instant-1.2": { "max_tokens": 8191, "max_input_tokens": 100000, diff --git a/litellm/utils.py b/litellm/utils.py index 37de566925..02b7bfd48f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -4019,6 +4019,11 @@ def get_llm_provider( or get_secret("TOGETHERAI_API_KEY") or get_secret("TOGETHER_AI_TOKEN") ) + elif custom_llm_provider == "friendliai": + api_base = "https://inference.friendli.ai/v1" + dynamic_api_key = get_secret("FRIENDLIAI_API_KEY") or get_secret( + "FRIENDLI_TOKEN" + ) if api_base is not None and not isinstance(api_base, str): raise Exception( "api base needs to be a string. api_base={}".format(api_base) @@ -4072,6 +4077,11 @@ def get_llm_provider( elif endpoint == "api.deepseek.com/v1": custom_llm_provider = "deepseek" dynamic_api_key = get_secret("DEEPSEEK_API_KEY") + elif endpoint == "inference.friendli.ai/v1": + custom_llm_provider = "friendliai" + dynamic_api_key = get_secret( + "FRIENDLIAI_API_KEY" + ) or get_secret("FRIENDLI_TOKEN") if api_base is not None and not isinstance(api_base, str): raise Exception( diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 7438241543..ef07d87ccb 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -922,6 +922,36 @@ "mode": "chat", "supports_function_calling": true }, + "friendliai/mixtral-8x7b-instruct-v0-1": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "input_cost_per_token": 0.0000004, + "output_cost_per_token": 0.0000004, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, + "friendliai/meta-llama-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000001, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, + "friendliai/meta-llama-3-70b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000008, + "litellm_provider": "friendliai", + "mode": "chat", + "supports_function_calling": true + }, "claude-instant-1.2": { "max_tokens": 8191, "max_input_tokens": 100000,