fix(utils.py): adding support for anyscale models

This commit is contained in:
Krrish Dholakia 2023-10-25 09:08:03 -07:00
parent 9714b85cac
commit c1b2553827
5 changed files with 88 additions and 6 deletions

View file

@ -262,6 +262,7 @@ provider_list: List = [
"ollama",
"deepinfra",
"perplexity",
"anyscale",
"custom", # custom apis
]

View file

@ -950,7 +950,6 @@ def completion(
vertex_ai_location = (litellm.vertex_location
or get_secret("VERTEXAI_LOCATION"))
# palm does not support streaming as yet :(
model_response = vertex_ai.completion(
model=model,
messages=messages,

View file

@ -51,7 +51,7 @@ def test_completion_claude():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_claude()
# test_completion_claude()
# def test_completion_oobabooga():
# try:
@ -129,7 +129,7 @@ def test_completion_perplexity_api():
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_perplexity_api()
# test_completion_perplexity_api()
def test_completion_perplexity_api_2():
try:
@ -151,7 +151,7 @@ def test_completion_perplexity_api_2():
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_perplexity_api_2()
# test_completion_perplexity_api_2()
# commenting out as this is a flaky test on circle ci
# def test_completion_nlp_cloud():
@ -1031,7 +1031,7 @@ def test_completion_together_ai():
# for model in test_models:
# try:
# print("making request", model)
# response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}])
# response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}])
# print(response)
# assert type(response.choices[0].message.content) == str
# except Exception as e:
@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks():
pytest.fail(f"Error occurred: {e}")
# test_completion_with_fallbacks()
def test_completion_anyscale_api():
try:
# litellm.set_verbose=True
messages=[{
"role": "system",
"content": "You're a good bot"
},{
"role": "user",
"content": "Hey",
},{
"role": "user",
"content": "Hey",
}]
response = completion(
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
messages=messages,)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
# test_completion_anyscale_api()
def test_completion_anyscale_2():
try:
# litellm.set_verbose=True
messages=[{
"role": "system",
"content": "You're a good bot"
},{
"role": "user",
"content": "Hey",
},{
"role": "user",
"content": "Hey",
}]
response = completion(
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
messages=messages
)
print(response)
except Exception as e:
pytest.fail(f"Error occurred: {e}")
test_completion_anyscale_2()
# def test_completion_with_fallbacks_multiple_keys():
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")

View file

@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
api_base = "https://api.perplexity.ai"
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
custom_llm_provider = "custom_openai"
elif custom_llm_provider == "anyscale":
# anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
api_base = "https://api.endpoints.anyscale.com/v1"
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
custom_llm_provider = "custom_openai"
return model, custom_llm_provider, dynamic_api_key, api_base
# check if api base is a known openai compatible endpoint

View file

@ -655,5 +655,40 @@
"output_cost_per_token": 0.000000,
"litellm_provider": "perplexity",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-7b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000015,
"output_cost_per_token": 0.00000015,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-13b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.00000025,
"output_cost_per_token": 0.00000025,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/meta-llama/Llama-2-70b-chat-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "anyscale",
"mode": "chat"
},
"anyscale/codellama/CodeLlama-34b-Instruct-hf": {
"max_tokens": 4096,
"input_cost_per_token": 0.000001,
"output_cost_per_token": 0.000001,
"litellm_provider": "anyscale",
"mode": "chat"
}
}