diff --git a/litellm/__init__.py b/litellm/__init__.py
index 29560a1aa..cb2b64e7b 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -262,6 +262,7 @@ provider_list: List = [
     "ollama",
     "deepinfra",
     "perplexity",
+    "anyscale",
     "custom", # custom apis
 ]
 
diff --git a/litellm/main.py b/litellm/main.py
index 46e9bee3a..7a7571583 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -950,7 +950,6 @@ def completion(
             vertex_ai_location = (litellm.vertex_location 
                                   or get_secret("VERTEXAI_LOCATION"))
 
-            # palm does not support streaming as yet :(
             model_response = vertex_ai.completion(
                 model=model,
                 messages=messages,
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index ca5dba4b2..728055571 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -51,7 +51,7 @@ def test_completion_claude():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_claude()
+# test_completion_claude()
 
 # def test_completion_oobabooga():
 #     try:
@@ -129,7 +129,7 @@ def test_completion_perplexity_api():
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
-test_completion_perplexity_api()
+# test_completion_perplexity_api()
 
 def test_completion_perplexity_api_2():
     try:
@@ -151,7 +151,7 @@ def test_completion_perplexity_api_2():
         print(response)
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
-test_completion_perplexity_api_2()
+# test_completion_perplexity_api_2()
 
 # commenting out as this is a flaky test on circle ci
 # def test_completion_nlp_cloud():
@@ -1031,7 +1031,7 @@ def test_completion_together_ai():
 #     for model in test_models:
 #         try:
 #             print("making request", model)
-#             response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}])
+#             response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}])
 #             print(response)
 #             assert type(response.choices[0].message.content) == str
 #         except Exception as e:
@@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks():
         pytest.fail(f"Error occurred: {e}")
 
 # test_completion_with_fallbacks()
+def test_completion_anyscale_api():
+    try:
+        # litellm.set_verbose=True
+        messages=[{
+            "role": "system", 
+            "content": "You're a good bot"
+        },{
+            "role": "user", 
+            "content": "Hey", 
+        },{
+            "role": "user", 
+            "content": "Hey", 
+        }]
+        response = completion(
+            model="anyscale/meta-llama/Llama-2-7b-chat-hf", 
+            messages=messages,)
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+
+# test_completion_anyscale_api()
+
+def test_completion_anyscale_2():
+    try:
+        # litellm.set_verbose=True
+        messages=[{
+            "role": "system", 
+            "content": "You're a good bot"
+        },{
+            "role": "user", 
+            "content": "Hey", 
+        },{
+            "role": "user", 
+            "content": "Hey", 
+        }]
+        response = completion(
+            model="anyscale/meta-llama/Llama-2-7b-chat-hf", 
+            messages=messages
+        )
+        print(response)
+    except Exception as e:
+        pytest.fail(f"Error occurred: {e}")
+test_completion_anyscale_2()
 # def test_completion_with_fallbacks_multiple_keys():
 #     print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
 #     print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
diff --git a/litellm/utils.py b/litellm/utils.py
index 2566da916..4f6c9c3aa 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
                 api_base = "https://api.perplexity.ai"
                 dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
                 custom_llm_provider = "custom_openai"
-
+            elif custom_llm_provider == "anyscale": 
+                # anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
+                api_base = "https://api.endpoints.anyscale.com/v1"
+                dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
+                custom_llm_provider = "custom_openai"
             return model, custom_llm_provider, dynamic_api_key, api_base
 
         # check if api base is a known openai compatible endpoint
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index ec57144c0..84fc0b890 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -655,5 +655,40 @@
         "output_cost_per_token": 0.000000, 
         "litellm_provider": "perplexity", 
         "mode": "chat" 
+      },
+      "anyscale/meta-llama/Llama-2-7b-chat-hf": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000015, 
+        "output_cost_per_token": 0.00000015, 
+        "litellm_provider": "anyscale", 
+        "mode": "chat"
+      },
+      "anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000015, 
+        "output_cost_per_token": 0.00000015,
+        "litellm_provider": "anyscale", 
+        "mode": "chat"
+      },
+      "anyscale/meta-llama/Llama-2-13b-chat-hf": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000025, 
+        "output_cost_per_token": 0.00000025, 
+        "litellm_provider": "anyscale", 
+        "mode": "chat"
+      },
+      "anyscale/meta-llama/Llama-2-70b-chat-hf": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.000001, 
+        "output_cost_per_token": 0.000001, 
+        "litellm_provider": "anyscale", 
+        "mode": "chat"
+      },
+      "anyscale/codellama/CodeLlama-34b-Instruct-hf": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.000001, 
+        "output_cost_per_token": 0.000001, 
+        "litellm_provider": "anyscale", 
+        "mode": "chat"
       }
 }