fix(utils.py): adding support for anyscale models

2023-10-25 09:08:03 -07:00 · 2023-10-25 09:08:03 -07:00 · c1b2553827
commit c1b2553827
parent 9714b85cac
5 changed files with 88 additions and 6 deletions
--- a/litellm/init.py
+++ b/litellm/init.py
@ -262,6 +262,7 @@ provider_list: List = [
    "ollama",
    "deepinfra",
    "perplexity",
    "anyscale",
    "custom", # custom apis
 ]
--- a/litellm/main.py
+++ b/litellm/main.py
@ -950,7 +950,6 @@ def completion(
            vertex_ai_location = (litellm.vertex_location 
                                  or get_secret("VERTEXAI_LOCATION"))
            # palm does not support streaming as yet :(
            model_response = vertex_ai.completion(
                model=model,
                messages=messages,
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@ -51,7 +51,7 @@ def test_completion_claude():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_claude()
+# test_completion_claude()
 # def test_completion_oobabooga():
 #     try:
@ -129,7 +129,7 @@ def test_completion_perplexity_api():
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_perplexity_api()
+# test_completion_perplexity_api()
 def test_completion_perplexity_api_2():
    try:
@ -151,7 +151,7 @@ def test_completion_perplexity_api_2():
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
-test_completion_perplexity_api_2()
+# test_completion_perplexity_api_2()
 # commenting out as this is a flaky test on circle ci
 # def test_completion_nlp_cloud():
@ -1031,7 +1031,7 @@ def test_completion_together_ai():
 #     for model in test_models:
 #         try:
 #             print("making request", model)
-#             response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}])
+#             response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}])
 #             print(response)
 #             assert type(response.choices[0].message.content) == str
 #         except Exception as e:
@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks():
        pytest.fail(f"Error occurred: {e}")
 # test_completion_with_fallbacks()
 def test_completion_anyscale_api():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system", 
            "content": "You're a good bot"
        },{
            "role": "user", 
            "content": "Hey", 
        },{
            "role": "user", 
            "content": "Hey", 
        }]
        response = completion(
            model="anyscale/meta-llama/Llama-2-7b-chat-hf", 
            messages=messages,)
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 # test_completion_anyscale_api()
 def test_completion_anyscale_2():
    try:
        # litellm.set_verbose=True
        messages=[{
            "role": "system", 
            "content": "You're a good bot"
        },{
            "role": "user", 
            "content": "Hey", 
        },{
            "role": "user", 
            "content": "Hey", 
        }]
        response = completion(
            model="anyscale/meta-llama/Llama-2-7b-chat-hf", 
            messages=messages
        )
        print(response)
    except Exception as e:
        pytest.fail(f"Error occurred: {e}")
 test_completion_anyscale_2()
 # def test_completion_with_fallbacks_multiple_keys():
 #     print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
 #     print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
--- a/litellm/utils.py
+++ b/litellm/utils.py
@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
                api_base = "https://api.perplexity.ai"
                dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
                custom_llm_provider = "custom_openai"
-
+            elif custom_llm_provider == "anyscale": 
                # anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
                api_base = "https://api.endpoints.anyscale.com/v1"
                dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
                custom_llm_provider = "custom_openai"
            return model, custom_llm_provider, dynamic_api_key, api_base
        # check if api base is a known openai compatible endpoint
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@ -655,5 +655,40 @@
        "output_cost_per_token": 0.000000, 
        "litellm_provider": "perplexity", 
        "mode": "chat" 
      },
      "anyscale/meta-llama/Llama-2-7b-chat-hf": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000015, 
        "output_cost_per_token": 0.00000015, 
        "litellm_provider": "anyscale", 
        "mode": "chat"
      },
      "anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000015, 
        "output_cost_per_token": 0.00000015,
        "litellm_provider": "anyscale", 
        "mode": "chat"
      },
      "anyscale/meta-llama/Llama-2-13b-chat-hf": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.00000025, 
        "output_cost_per_token": 0.00000025, 
        "litellm_provider": "anyscale", 
        "mode": "chat"
      },
      "anyscale/meta-llama/Llama-2-70b-chat-hf": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.000001, 
        "output_cost_per_token": 0.000001, 
        "litellm_provider": "anyscale", 
        "mode": "chat"
      },
      "anyscale/codellama/CodeLlama-34b-Instruct-hf": {
        "max_tokens": 4096, 
        "input_cost_per_token": 0.000001, 
        "output_cost_per_token": 0.000001, 
        "litellm_provider": "anyscale", 
        "mode": "chat"
      }
 }