forked from phoenix/litellm-mirror
fix(utils.py): adding support for anyscale models
This commit is contained in:
parent
9714b85cac
commit
c1b2553827
5 changed files with 88 additions and 6 deletions
|
@ -262,6 +262,7 @@ provider_list: List = [
|
|||
"ollama",
|
||||
"deepinfra",
|
||||
"perplexity",
|
||||
"anyscale",
|
||||
"custom", # custom apis
|
||||
]
|
||||
|
||||
|
|
|
@ -950,7 +950,6 @@ def completion(
|
|||
vertex_ai_location = (litellm.vertex_location
|
||||
or get_secret("VERTEXAI_LOCATION"))
|
||||
|
||||
# palm does not support streaming as yet :(
|
||||
model_response = vertex_ai.completion(
|
||||
model=model,
|
||||
messages=messages,
|
||||
|
|
|
@ -51,7 +51,7 @@ def test_completion_claude():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
test_completion_claude()
|
||||
# test_completion_claude()
|
||||
|
||||
# def test_completion_oobabooga():
|
||||
# try:
|
||||
|
@ -129,7 +129,7 @@ def test_completion_perplexity_api():
|
|||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
test_completion_perplexity_api()
|
||||
# test_completion_perplexity_api()
|
||||
|
||||
def test_completion_perplexity_api_2():
|
||||
try:
|
||||
|
@ -151,7 +151,7 @@ def test_completion_perplexity_api_2():
|
|||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
test_completion_perplexity_api_2()
|
||||
# test_completion_perplexity_api_2()
|
||||
|
||||
# commenting out as this is a flaky test on circle ci
|
||||
# def test_completion_nlp_cloud():
|
||||
|
@ -1031,7 +1031,7 @@ def test_completion_together_ai():
|
|||
# for model in test_models:
|
||||
# try:
|
||||
# print("making request", model)
|
||||
# response = completion(model="chat-bison", messages=[{'role': 'user', 'content': 'hi'}])
|
||||
# response = completion(model="vertex_ai/codechat-bison-32k", messages=[{'role': 'user', 'content': 'hi'}])
|
||||
# print(response)
|
||||
# assert type(response.choices[0].message.content) == str
|
||||
# except Exception as e:
|
||||
|
@ -1068,6 +1068,49 @@ def test_completion_with_fallbacks():
|
|||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_with_fallbacks()
|
||||
def test_completion_anyscale_api():
|
||||
try:
|
||||
# litellm.set_verbose=True
|
||||
messages=[{
|
||||
"role": "system",
|
||||
"content": "You're a good bot"
|
||||
},{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
}]
|
||||
response = completion(
|
||||
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
|
||||
messages=messages,)
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
|
||||
# test_completion_anyscale_api()
|
||||
|
||||
def test_completion_anyscale_2():
|
||||
try:
|
||||
# litellm.set_verbose=True
|
||||
messages=[{
|
||||
"role": "system",
|
||||
"content": "You're a good bot"
|
||||
},{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
},{
|
||||
"role": "user",
|
||||
"content": "Hey",
|
||||
}]
|
||||
response = completion(
|
||||
model="anyscale/meta-llama/Llama-2-7b-chat-hf",
|
||||
messages=messages
|
||||
)
|
||||
print(response)
|
||||
except Exception as e:
|
||||
pytest.fail(f"Error occurred: {e}")
|
||||
test_completion_anyscale_2()
|
||||
# def test_completion_with_fallbacks_multiple_keys():
|
||||
# print(f"backup key 1: {os.getenv('BACKUP_OPENAI_API_KEY_1')}")
|
||||
# print(f"backup key 2: {os.getenv('BACKUP_OPENAI_API_KEY_2')}")
|
||||
|
|
|
@ -1572,7 +1572,11 @@ def get_llm_provider(model: str, custom_llm_provider: Optional[str] = None, api_
|
|||
api_base = "https://api.perplexity.ai"
|
||||
dynamic_api_key = os.getenv("PERPLEXITYAI_API_KEY")
|
||||
custom_llm_provider = "custom_openai"
|
||||
|
||||
elif custom_llm_provider == "anyscale":
|
||||
# anyscale is openai compatible, we just need to set this to custom_openai and have the api_base be https://api.endpoints.anyscale.com/v1
|
||||
api_base = "https://api.endpoints.anyscale.com/v1"
|
||||
dynamic_api_key = os.getenv("ANYSCALE_API_KEY")
|
||||
custom_llm_provider = "custom_openai"
|
||||
return model, custom_llm_provider, dynamic_api_key, api_base
|
||||
|
||||
# check if api base is a known openai compatible endpoint
|
||||
|
|
|
@ -655,5 +655,40 @@
|
|||
"output_cost_per_token": 0.000000,
|
||||
"litellm_provider": "perplexity",
|
||||
"mode": "chat"
|
||||
},
|
||||
"anyscale/meta-llama/Llama-2-7b-chat-hf": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
},
|
||||
"anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000015,
|
||||
"output_cost_per_token": 0.00000015,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
},
|
||||
"anyscale/meta-llama/Llama-2-13b-chat-hf": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.00000025,
|
||||
"output_cost_per_token": 0.00000025,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
},
|
||||
"anyscale/meta-llama/Llama-2-70b-chat-hf": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
},
|
||||
"anyscale/codellama/CodeLlama-34b-Instruct-hf": {
|
||||
"max_tokens": 4096,
|
||||
"input_cost_per_token": 0.000001,
|
||||
"output_cost_per_token": 0.000001,
|
||||
"litellm_provider": "anyscale",
|
||||
"mode": "chat"
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue