forked from phoenix/litellm-mirror
support default fallback models
This commit is contained in:
parent
31c995a1a4
commit
ebd763287a
5 changed files with 61 additions and 33 deletions
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -14,6 +14,7 @@ from litellm import completion_with_config
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"function": "completion",
|
"function": "completion",
|
||||||
|
"default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
|
||||||
"model": {
|
"model": {
|
||||||
"claude-instant-1": {
|
"claude-instant-1": {
|
||||||
"needs_moderation": True
|
"needs_moderation": True
|
||||||
|
@ -26,12 +27,20 @@ config = {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
def test_config():
|
def test_config_context_window_exceeded():
|
||||||
try:
|
try:
|
||||||
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
sample_text = "how does a court case get to the Supreme Court?" * 1000
|
||||||
messages = [{"content": sample_text, "role": "user"}]
|
messages = [{"content": sample_text, "role": "user"}]
|
||||||
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
response = completion_with_config(model="gpt-3.5-turbo", messages=messages, config=config)
|
||||||
print(response)
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception: {e}")
|
||||||
|
pytest.fail(f"An exception occurred: {e}")
|
||||||
|
|
||||||
|
# test_config_context_window_exceeded()
|
||||||
|
|
||||||
|
def test_config_context_moderation():
|
||||||
|
try:
|
||||||
messages=[{"role": "user", "content": "I want to kill them."}]
|
messages=[{"role": "user", "content": "I want to kill them."}]
|
||||||
response = completion_with_config(model="claude-instant-1", messages=messages, config=config)
|
response = completion_with_config(model="claude-instant-1", messages=messages, config=config)
|
||||||
print(response)
|
print(response)
|
||||||
|
@ -39,4 +48,15 @@ def test_config():
|
||||||
print(f"Exception: {e}")
|
print(f"Exception: {e}")
|
||||||
pytest.fail(f"An exception occurred: {e}")
|
pytest.fail(f"An exception occurred: {e}")
|
||||||
|
|
||||||
# test_config()
|
# test_config_context_moderation()
|
||||||
|
|
||||||
|
def test_config_context_default_fallback():
|
||||||
|
try:
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}]
|
||||||
|
response = completion_with_config(model="claude-instant-1", messages=messages, config=config, api_key="bad-key")
|
||||||
|
print(response)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Exception: {e}")
|
||||||
|
pytest.fail(f"An exception occurred: {e}")
|
||||||
|
|
||||||
|
test_config_context_default_fallback()
|
|
@ -2795,6 +2795,10 @@ def completion_with_config(*args, config: Union[dict, str], **kwargs):
|
||||||
models_with_config = completion_config["model"].keys()
|
models_with_config = completion_config["model"].keys()
|
||||||
model = args[0] if len(args) > 0 else kwargs["model"]
|
model = args[0] if len(args) > 0 else kwargs["model"]
|
||||||
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
messages = args[1] if len(args) > 1 else kwargs["messages"]
|
||||||
|
|
||||||
|
## Default fallback models
|
||||||
|
fallback_models = completion_config.get("default_fallback_models")
|
||||||
|
try:
|
||||||
if model in models_with_config:
|
if model in models_with_config:
|
||||||
## Moderation check
|
## Moderation check
|
||||||
if completion_config["model"][model].get("needs_moderation"):
|
if completion_config["model"][model].get("needs_moderation"):
|
||||||
|
@ -2804,7 +2808,7 @@ def completion_with_config(*args, config: Union[dict, str], **kwargs):
|
||||||
if flagged:
|
if flagged:
|
||||||
raise Exception("This response was flagged as inappropriate")
|
raise Exception("This response was flagged as inappropriate")
|
||||||
|
|
||||||
## Load Error Handling Logic
|
## Model-specific Error Handling
|
||||||
error_handling = None
|
error_handling = None
|
||||||
if completion_config["model"][model].get("error_handling"):
|
if completion_config["model"][model].get("error_handling"):
|
||||||
error_handling = completion_config["model"][model]["error_handling"]
|
error_handling = completion_config["model"][model]["error_handling"]
|
||||||
|
@ -2825,6 +2829,11 @@ def completion_with_config(*args, config: Union[dict, str], **kwargs):
|
||||||
raise e
|
raise e
|
||||||
else:
|
else:
|
||||||
return litellm.completion(*args, **kwargs)
|
return litellm.completion(*args, **kwargs)
|
||||||
|
except Exception as e:
|
||||||
|
if fallback_models:
|
||||||
|
model = fallback_models.pop(0)
|
||||||
|
return completion_with_fallbacks(model=model, messages=messages, fallbacks=fallback_models)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -2925,7 +2934,6 @@ def completion_with_fallbacks(**kwargs):
|
||||||
if kwargs.get("model"):
|
if kwargs.get("model"):
|
||||||
del kwargs["model"]
|
del kwargs["model"]
|
||||||
|
|
||||||
print("making completion call", model)
|
|
||||||
response = litellm.completion(**kwargs, model=model)
|
response = litellm.completion(**kwargs, model=model)
|
||||||
|
|
||||||
if response != None:
|
if response != None:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue