diff --git a/litellm/__init__.py b/litellm/__init__.py index 29560a1aa..cb2b64e7b 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -262,6 +262,7 @@ provider_list: List = [ "ollama", "deepinfra", "perplexity", + "anyscale", "custom", # custom apis ] diff --git a/litellm/main.py b/litellm/main.py index 46e9bee3a..7a7571583 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -950,7 +950,6 @@ def completion( vertex_ai_location = (litellm.vertex_location or get_secret("VERTEXAI_LOCATION")) - # palm does not support streaming as yet :( model_response = vertex_ai.completion( model=model, messages=messages, diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index ca5dba4b2..728055571 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -51,7 +51,7 @@ def test_completion_claude(): except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_claude() +# test_completion_claude() # def test_completion_oobabooga(): # try: @@ -129,7 +129,7 @@ def test_completion_perplexity_api(): except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_perplexity_api() +# test_completion_perplexity_api() def test_completion_perplexity_api_2(): try: @@ -151,7 +151,7 @@ def test_completion_perplexity_api_2(): print(response) except Exception as e: pytest.fail(f"Error occurred: {e}") -test_completion_perplexity_api_2() +# test_completion_perplexity_api_2() # commenting out as this is a flaky test on circle ci # def test_completion_nlp_cloud(): @@ -1031,7 +1031,7 @@ def test_completion_together_ai(): # for model in test_models: # try: # print("making request", model) -# response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}]) +# response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}]) # print(response) # assert type(response.choices[0].message.content) == str # except Exception as e: @@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks(): pytest.fail(f"Error occurred: {e}") # test_completion_with_fallbacks() +def test_completion_anyscale_api(): + try: + # litellm.set_verbose=True + messages=[{ + "role": "system", + "content": "You're a good bot" + },{ + "role": "user", + "content": "Hey", + },{ + "role": "user", + "content": "Hey", + }] + response = completion( + model="anyscale/meta-llama/Llama-2-7b-chat-hf", + messages=messages,) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") + +# test_completion_anyscale_api() + +def test_completion_anyscale_2(): + try: + # litellm.set_verbose=True + messages=[{ + "role": "system", + "content": "You're a good bot" + },{ + "role": "user", + "content": "Hey", + },{ + "role": "user", + "content": "Hey", + }] + response = completion( + model="anyscale/meta-llama/Llama-2-7b-chat-hf", + messages=messages + ) + print(response) + except Exception as e: + pytest.fail(f"Error occurred: {e}") +test_completion_anyscale_2() # def test_completion_with_fallbacks_multiple_keys(): # print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}") # print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}") diff --git a/litellm/utils.py b/litellm/utils.py index 2566da916..4f6c9c3aa 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_ api_base = "https://api.perplexity.ai" dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY") custom_llm_provider = "custom_openai" - + elif custom_llm_provider == "anyscale": + # anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1 + api_base = "https://api.endpoints.anyscale.com/v1" + dynamic_api_key = os.getenv("ANYSCALE_API_KEY") + custom_llm_provider = "custom_openai" return model, custom_llm_provider, dynamic_api_key, api_base # check if api base is a known openai compatible endpoint diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index ec57144c0..84fc0b890 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -655,5 +655,40 @@ "output_cost_per_token": 0.000000, "litellm_provider": "perplexity", "mode": "chat" + }, + "anyscale/meta-llama/Llama-2-7b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000015, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/meta-llama/Llama-2-13b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.00000025, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/meta-llama/Llama-2-70b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", + "mode": "chat" + }, + "anyscale/codellama/CodeLlama-34b-Instruct-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, + "litellm_provider": "anyscale", + "mode": "chat" } }