forked from phoenix/litellm-mirror
fix use provider specific routing
This commit is contained in:
parent
218ba0f470
commit
f1ffa82062
4 changed files with 35 additions and 15 deletions
|
@ -8,9 +8,15 @@ model_list:
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
|
||||||
api_key: "os.environ/FIREWORKS"
|
api_key: "os.environ/FIREWORKS"
|
||||||
- model_name: "*"
|
# provider specific wildcard routing
|
||||||
|
- model_name: "anthropic/*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: "*"
|
model: "anthropic/*"
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
- model_name: "groq/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "groq/*"
|
||||||
|
api_key: os.environ/GROQ_API_KEY
|
||||||
- model_name: "*"
|
- model_name: "*"
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
|
|
|
@ -4469,13 +4469,7 @@ class Router:
|
||||||
)
|
)
|
||||||
model = self.model_group_alias[model]
|
model = self.model_group_alias[model]
|
||||||
|
|
||||||
if model not in self.model_names and self.default_deployment is not None:
|
if model not in self.model_names:
|
||||||
updated_deployment = copy.deepcopy(
|
|
||||||
self.default_deployment
|
|
||||||
) # self.default_deployment
|
|
||||||
updated_deployment["litellm_params"]["model"] = model
|
|
||||||
return model, updated_deployment
|
|
||||||
elif model not in self.model_names:
|
|
||||||
# check if provider/ specific wildcard routing
|
# check if provider/ specific wildcard routing
|
||||||
try:
|
try:
|
||||||
(
|
(
|
||||||
|
@ -4499,6 +4493,14 @@ class Router:
|
||||||
# get_llm_provider raises exception when provider is unknown
|
# get_llm_provider raises exception when provider is unknown
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
# check if default deployment is set
|
||||||
|
if self.default_deployment is not None:
|
||||||
|
updated_deployment = copy.deepcopy(
|
||||||
|
self.default_deployment
|
||||||
|
) # self.default_deployment
|
||||||
|
updated_deployment["litellm_params"]["model"] = model
|
||||||
|
return model, updated_deployment
|
||||||
|
|
||||||
## get healthy deployments
|
## get healthy deployments
|
||||||
### get all deployments
|
### get all deployments
|
||||||
healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
|
healthy_deployments = [m for m in self.model_list if m["model_name"] == model]
|
||||||
|
|
|
@ -86,12 +86,16 @@ model_list:
|
||||||
model: openai/*
|
model: openai/*
|
||||||
api_key: os.environ/OPENAI_API_KEY
|
api_key: os.environ/OPENAI_API_KEY
|
||||||
|
|
||||||
# Pass through all llm requests to litellm.completion/litellm.embedding
|
|
||||||
# if user passes model="anthropic/claude-3-opus-20240229" proxy will make requests to anthropic claude-3-opus-20240229 using ANTHROPIC_API_KEY
|
|
||||||
- model_name: "*"
|
|
||||||
litellm_params:
|
|
||||||
model: "*"
|
|
||||||
|
|
||||||
|
# provider specific wildcard routing
|
||||||
|
- model_name: "anthropic/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "anthropic/*"
|
||||||
|
api_key: os.environ/ANTHROPIC_API_KEY
|
||||||
|
- model_name: "groq/*"
|
||||||
|
litellm_params:
|
||||||
|
model: "groq/*"
|
||||||
|
api_key: os.environ/GROQ_API_KEY
|
||||||
- model_name: mistral-embed
|
- model_name: mistral-embed
|
||||||
litellm_params:
|
litellm_params:
|
||||||
model: mistral/mistral-embed
|
model: mistral/mistral-embed
|
||||||
|
|
|
@ -119,7 +119,9 @@ async def chat_completion(session, key, model: Union[str, List] = "gpt-4"):
|
||||||
print()
|
print()
|
||||||
|
|
||||||
if status != 200:
|
if status != 200:
|
||||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
raise Exception(
|
||||||
|
f"Request did not return a 200 status code: {status}, response text={response_text}"
|
||||||
|
)
|
||||||
|
|
||||||
response_header_check(
|
response_header_check(
|
||||||
response
|
response
|
||||||
|
@ -485,6 +487,12 @@ async def test_proxy_all_models():
|
||||||
session=session, key=LITELLM_MASTER_KEY, model="groq/llama3-8b-8192"
|
session=session, key=LITELLM_MASTER_KEY, model="groq/llama3-8b-8192"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
await chat_completion(
|
||||||
|
session=session,
|
||||||
|
key=LITELLM_MASTER_KEY,
|
||||||
|
model="anthropic/claude-3-sonnet-20240229",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_batch_chat_completions():
|
async def test_batch_chat_completions():
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue