diff --git a/docs/my-website/docs/providers/fireworks_ai.md b/docs/my-website/docs/providers/fireworks_ai.md new file mode 100644 index 000000000..ba50bd1f2 --- /dev/null +++ b/docs/my-website/docs/providers/fireworks_ai.md @@ -0,0 +1,53 @@ +# Fireworks AI +https://fireworks.ai/ + +**We support ALL Fireworks AI models, just set `fireworks_ai/` as a prefix when sending completion requests** + +## API Key +```python +# env variable +os.environ['FIREWORKS_AI_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['FIREWORKS_AI_API_KEY'] = "" +response = completion( + model="fireworks_ai/mixtral-8x7b-instruct", + messages=[ + {"role": "user", "content": "hello from litellm"} + ], +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['FIREWORKS_AI_API_KEY'] = "" +response = completion( + model="fireworks_ai/mixtral-8x7b-instruct", + messages=[ + {"role": "user", "content": "hello from litellm"} + ], + stream=True +) + +for chunk in response: + print(chunk) +``` + + +## Supported Models - ALL Fireworks AI Models Supported! +We support ALL Fireworks AI models, just set `fireworks_ai/` as a prefix when sending completion requests + +| Model Name | Function Call | +|--------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mixtral-8x7b-instruct | `completion(model="fireworks_ai/mixtral-8x7b-instruct", messages)` | +| firefunction-v1 | `completion(model="fireworks_ai/firefunction-v1", messages)` | +| llama-v2-70b-chat | `completion(model="fireworks_ai/llama-v2-70b-chat", messages)` | \ No newline at end of file diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index ae56f9d7c..21f66a778 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -138,6 +138,7 @@ const sidebars = { "providers/ollama", "providers/perplexity", "providers/groq", + "providers/fireworks_ai", "providers/vllm", "providers/xinference", "providers/cloudflare_workers", diff --git a/litellm/__init__.py b/litellm/__init__.py index 034c4ca03..7eae39097 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -328,6 +328,7 @@ openai_compatible_providers: List = [ "perplexity", "xinference", "together_ai", + "fireworks_ai", ] @@ -479,6 +480,7 @@ provider_list: List = [ "voyage", "cloudflare", "xinference", + "fireworks_ai", "custom", # custom apis ] diff --git a/litellm/main.py b/litellm/main.py index 8740ebba4..3a9fed77e 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -892,6 +892,7 @@ def completion( or custom_llm_provider == "mistral" or custom_llm_provider == "openai" or custom_llm_provider == "together_ai" + or custom_llm_provider in litellm.openai_compatible_providers or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo ): # allow user to make an openai call with a custom base # note: if a user sets a custom base - we should ensure this works @@ -2394,6 +2395,7 @@ async def aembedding(*args, **kwargs): or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "fireworks_ai" or custom_llm_provider == "ollama" or custom_llm_provider == "vertex_ai" ): # currently implemented aiohttp calls for just azure and openai, soon all. @@ -2893,6 +2895,7 @@ async def atext_completion(*args, **kwargs): or custom_llm_provider == "deepinfra" or custom_llm_provider == "perplexity" or custom_llm_provider == "groq" + or custom_llm_provider == "fireworks_ai" or custom_llm_provider == "text-completion-openai" or custom_llm_provider == "huggingface" or custom_llm_provider == "ollama" diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index bfc9dfb71..3d95e66e7 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -529,6 +529,25 @@ def test_completion_azure_gpt4_vision(): # test_completion_azure_gpt4_vision() +def test_completion_fireworks_ai(): + try: + litellm.set_verbose = True + messages = [ + {"role": "system", "content": "You're a good bot"}, + { + "role": "user", + "content": "Hey", + }, + ] + response = completion( + model="fireworks_ai/accounts/fireworks/models/mixtral-8x7b-instruct", + messages=messages, + ) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + @pytest.mark.skip(reason="this test is flaky") def test_completion_perplexity_api(): try: diff --git a/litellm/utils.py b/litellm/utils.py index a77a662e3..7ad4107a9 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -5375,6 +5375,17 @@ def get_llm_provider( # groq is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1 api_base = "https://api.groq.com/openai/v1" dynamic_api_key = get_secret("GROQ_API_KEY") + elif custom_llm_provider == "fireworks_ai": + # fireworks is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.groq.com/openai/v1 + if not model.startswith("accounts/fireworks/models"): + model = f"accounts/fireworks/models/{model}" + api_base = "https://api.fireworks.ai/inference/v1" + dynamic_api_key = ( + get_secret("FIREWORKS_API_KEY") + or get_secret("FIREWORKS_AI_API_KEY") + or get_secret("FIREWORKSAI_API_KEY") + or get_secret("FIREWORKS_AI_TOKEN") + ) elif custom_llm_provider == "mistral": # mistral is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.mistral.ai api_base = (