forked from phoenix/litellm-mirror
fix(utils.py): adding support for anyscale models
This commit is contained in:
parent
9714b85cac
commit
c1b2553827
5 changed files with 88 additions and 6 deletions
|
@ -262,6 +262,7 @@ provider_list: List = [
|
||||||
"ollama",
|
"ollama",
|
||||||
"deepinfra",
|
"deepinfra",
|
||||||
"perplexity",
|
"perplexity",
|
||||||
|
"anyscale",
|
||||||
"custom", # custom apis
|
"custom", # custom apis
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -950,7 +950,6 @@ def completion(
|
||||||
vertex_ai_location = (litellm.vertex_location
|
vertex_ai_location = (litellm.vertex_location
|
||||||
or get_secret("VERTEXAI_LOCATION"))
|
or get_secret("VERTEXAI_LOCATION"))
|
||||||
|
|
||||||
# palm does not support streaming as yet :(
|
|
||||||
model_response = vertex_ai.completion(
|
model_response = vertex_ai.completion(
|
||||||
model=model,
|
model=model,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
|
|
|
@ -51,7 +51,7 @@ def test_completion_claude():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
test_completion_claude()
|
# test_completion_claude()
|
||||||
|
|
||||||
# def test_completion_oobabooga():
|
# def test_completion_oobabooga():
|
||||||
# try:
|
# try:
|
||||||
|
@ -129,7 +129,7 @@ def test_completion_perplexity_api():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
test_completion_perplexity_api()
|
# test_completion_perplexity_api()
|
||||||
|
|
||||||
def test_completion_perplexity_api_2():
|
def test_completion_perplexity_api_2():
|
||||||
try:
|
try:
|
||||||
|
@ -151,7 +151,7 @@ def test_completion_perplexity_api_2():
|
||||||
print(response)
|
print(response)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
test_completion_perplexity_api_2()
|
# test_completion_perplexity_api_2()
|
||||||
|
|
||||||
# commenting out as this is a flaky test on circle ci
|
# commenting out as this is a flaky test on circle ci
|
||||||
# def test_completion_nlp_cloud():
|
# def test_completion_nlp_cloud():
|
||||||
|
@ -1031,7 +1031,7 @@ def test_completion_together_ai():
|
||||||
# for model in test_models:
|
# for model in test_models:
|
||||||
# try:
|
# try:
|
||||||
# print("making request", model)
|
# print("making request", model)
|
||||||
# response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}])
|
# response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}])
|
||||||
# print(response)
|
# print(response)
|
||||||
# assert type(response.choices[0].message.content) == str
|
# assert type(response.choices[0].message.content) == str
|
||||||
# except Exception as e:
|
# except Exception as e:
|
||||||
|
@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks():
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
# test_completion_with_fallbacks()
|
# test_completion_with_fallbacks()
|
||||||
|
def test_completion_anyscale_api():
|
||||||
|
try:
|
||||||
|
# litellm.set_verbose=True
|
||||||
|
messages=[{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You're a good bot"
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
}]
|
||||||
|
response = completion(
|
||||||
|
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
|
||||||
|
messages=messages,)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_anyscale_api()
|
||||||
|
|
||||||
|
def test_completion_anyscale_2():
|
||||||
|
try:
|
||||||
|
# litellm.set_verbose=True
|
||||||
|
messages=[{
|
||||||
|
"role": "system",
|
||||||
|
"content": "You're a good bot"
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
},{
|
||||||
|
"role": "user",
|
||||||
|
"content": "Hey",
|
||||||
|
}]
|
||||||
|
response = completion(
|
||||||
|
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
|
||||||
|
messages=messages
|
||||||
|
)
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
pytest.fail(f"Error occurred: {e}")
|
||||||
|
test_completion_anyscale_2()
|
||||||
# def test_completion_with_fallbacks_multiple_keys():
|
# def test_completion_with_fallbacks_multiple_keys():
|
||||||
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
|
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
|
||||||
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
|
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
|
||||||
|
|
|
@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
|
||||||
api_base = "https://api.perplexity.ai"
|
api_base = "https://api.perplexity.ai"
|
||||||
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
||||||
custom_llm_provider = "custom_openai"
|
custom_llm_provider = "custom_openai"
|
||||||
|
elif custom_llm_provider == "anyscale":
|
||||||
|
# anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
|
||||||
|
api_base = "https://api.endpoints.anyscale.com/v1"
|
||||||
|
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
|
||||||
|
custom_llm_provider = "custom_openai"
|
||||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||||
|
|
||||||
# check if api base is a known openai compatible endpoint
|
# check if api base is a known openai compatible endpoint
|
||||||
|
|
|
@ -655,5 +655,40 @@
|
||||||
"output_cost_per_token": 0.000000,
|
"output_cost_per_token": 0.000000,
|
||||||
"litellm_provider": "perplexity",
|
"litellm_provider": "perplexity",
|
||||||
"mode": "chat"
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"anyscale/meta-llama/Llama-2-7b-chat-hf": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.00000015,
|
||||||
|
"output_cost_per_token": 0.00000015,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"anyscale/meta-llama/Llama-2-13b-chat-hf": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.00000025,
|
||||||
|
"output_cost_per_token": 0.00000025,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"anyscale/meta-llama/Llama-2-70b-chat-hf": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000001,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat"
|
||||||
|
},
|
||||||
|
"anyscale/codellama/CodeLlama-34b-Instruct-hf": {
|
||||||
|
"max_tokens": 4096,
|
||||||
|
"input_cost_per_token": 0.000001,
|
||||||
|
"output_cost_per_token": 0.000001,
|
||||||
|
"litellm_provider": "anyscale",
|
||||||
|
"mode": "chat"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue