forked from phoenix/litellm-mirror
Merge pull request #3570 from BerriAI/litellm_test_model_openai_client
[Test] Proxy - uses the same OpenAI Client after 1 min
This commit is contained in:
commit
db0db5c62c
2 changed files with 84 additions and 9 deletions
|
@ -7795,11 +7795,15 @@ async def update_model(
|
||||||
)
|
)
|
||||||
async def model_info_v2(
|
async def model_info_v2(
|
||||||
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
user_api_key_dict: UserAPIKeyAuth = Depends(user_api_key_auth),
|
||||||
|
model: Optional[str] = fastapi.Query(
|
||||||
|
None, description="Specify the model name (optional)"
|
||||||
|
),
|
||||||
|
debug: Optional[bool] = False,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
BETA ENDPOINT. Might change unexpectedly. Use `/v1/model/info` for now.
|
BETA ENDPOINT. Might change unexpectedly. Use `/v1/model/info` for now.
|
||||||
"""
|
"""
|
||||||
global llm_model_list, general_settings, user_config_file_path, proxy_config
|
global llm_model_list, general_settings, user_config_file_path, proxy_config, llm_router
|
||||||
|
|
||||||
if llm_model_list is None or not isinstance(llm_model_list, list):
|
if llm_model_list is None or not isinstance(llm_model_list, list):
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
|
@ -7822,19 +7826,35 @@ async def model_info_v2(
|
||||||
if len(user_api_key_dict.models) > 0:
|
if len(user_api_key_dict.models) > 0:
|
||||||
user_models = user_api_key_dict.models
|
user_models = user_api_key_dict.models
|
||||||
|
|
||||||
|
if model is not None:
|
||||||
|
all_models = [m for m in all_models if m["model_name"] == model]
|
||||||
|
|
||||||
# fill in model info based on config.yaml and litellm model_prices_and_context_window.json
|
# fill in model info based on config.yaml and litellm model_prices_and_context_window.json
|
||||||
for model in all_models:
|
for _model in all_models:
|
||||||
# provided model_info in config.yaml
|
# provided model_info in config.yaml
|
||||||
model_info = model.get("model_info", {})
|
model_info = _model.get("model_info", {})
|
||||||
|
if debug == True:
|
||||||
|
_openai_client = "None"
|
||||||
|
if llm_router is not None:
|
||||||
|
_openai_client = (
|
||||||
|
llm_router._get_client(
|
||||||
|
deployment=_model, kwargs={}, client_type="async"
|
||||||
|
)
|
||||||
|
or "None"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_openai_client = "llm_router_is_None"
|
||||||
|
openai_client = str(_openai_client)
|
||||||
|
_model["openai_client"] = openai_client
|
||||||
|
|
||||||
# read litellm model_prices_and_context_window.json to get the following:
|
# read litellm model_prices_and_context_window.json to get the following:
|
||||||
# input_cost_per_token, output_cost_per_token, max_tokens
|
# input_cost_per_token, output_cost_per_token, max_tokens
|
||||||
litellm_model_info = get_litellm_model_info(model=model)
|
litellm_model_info = get_litellm_model_info(model=_model)
|
||||||
|
|
||||||
# 2nd pass on the model, try seeing if we can find model in litellm model_cost map
|
# 2nd pass on the model, try seeing if we can find model in litellm model_cost map
|
||||||
if litellm_model_info == {}:
|
if litellm_model_info == {}:
|
||||||
# use litellm_param model_name to get model_info
|
# use litellm_param model_name to get model_info
|
||||||
litellm_params = model.get("litellm_params", {})
|
litellm_params = _model.get("litellm_params", {})
|
||||||
litellm_model = litellm_params.get("model", None)
|
litellm_model = litellm_params.get("model", None)
|
||||||
try:
|
try:
|
||||||
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
litellm_model_info = litellm.get_model_info(model=litellm_model)
|
||||||
|
@ -7843,7 +7863,7 @@ async def model_info_v2(
|
||||||
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
|
# 3rd pass on the model, try seeing if we can find model but without the "/" in model cost map
|
||||||
if litellm_model_info == {}:
|
if litellm_model_info == {}:
|
||||||
# use litellm_param model_name to get model_info
|
# use litellm_param model_name to get model_info
|
||||||
litellm_params = model.get("litellm_params", {})
|
litellm_params = _model.get("litellm_params", {})
|
||||||
litellm_model = litellm_params.get("model", None)
|
litellm_model = litellm_params.get("model", None)
|
||||||
split_model = litellm_model.split("/")
|
split_model = litellm_model.split("/")
|
||||||
if len(split_model) > 0:
|
if len(split_model) > 0:
|
||||||
|
@ -7855,10 +7875,10 @@ async def model_info_v2(
|
||||||
for k, v in litellm_model_info.items():
|
for k, v in litellm_model_info.items():
|
||||||
if k not in model_info:
|
if k not in model_info:
|
||||||
model_info[k] = v
|
model_info[k] = v
|
||||||
model["model_info"] = model_info
|
_model["model_info"] = model_info
|
||||||
# don't return the api key / vertex credentials
|
# don't return the api key / vertex credentials
|
||||||
model["litellm_params"].pop("api_key", None)
|
_model["litellm_params"].pop("api_key", None)
|
||||||
model["litellm_params"].pop("vertex_credentials", None)
|
_model["litellm_params"].pop("vertex_credentials", None)
|
||||||
|
|
||||||
verbose_proxy_logger.debug("all_models: %s", all_models)
|
verbose_proxy_logger.debug("all_models: %s", all_models)
|
||||||
return {"data": all_models}
|
return {"data": all_models}
|
||||||
|
|
|
@ -246,6 +246,33 @@ async def get_model_info_v2(session, key):
|
||||||
raise Exception(f"Request did not return a 200 status code: {status}")
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
|
||||||
|
|
||||||
|
async def get_specific_model_info_v2(session, key, model_name):
|
||||||
|
url = "http://0.0.0.0:4000/v2/model/info?debug=True&model=" + model_name
|
||||||
|
print("running /model/info check for model=", model_name)
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
"Authorization": f"Bearer {key}",
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
}
|
||||||
|
|
||||||
|
async with session.get(url, headers=headers) as response:
|
||||||
|
status = response.status
|
||||||
|
response_text = await response.text()
|
||||||
|
print("response from v2/model/info")
|
||||||
|
print(response_text)
|
||||||
|
print()
|
||||||
|
|
||||||
|
_json_response = await response.json()
|
||||||
|
print("JSON response from /v2/model/info?model=", model_name, _json_response)
|
||||||
|
|
||||||
|
_model_info = _json_response["data"]
|
||||||
|
assert len(_model_info) == 1, f"Expected 1 model, got {len(_model_info)}"
|
||||||
|
|
||||||
|
if status != 200:
|
||||||
|
raise Exception(f"Request did not return a 200 status code: {status}")
|
||||||
|
return _model_info[0]
|
||||||
|
|
||||||
|
|
||||||
async def get_model_health(session, key, model_name):
|
async def get_model_health(session, key, model_name):
|
||||||
url = "http://0.0.0.0:4000/health?model=" + model_name
|
url = "http://0.0.0.0:4000/health?model=" + model_name
|
||||||
headers = {
|
headers = {
|
||||||
|
@ -285,6 +312,11 @@ async def test_add_model_run_health():
|
||||||
model_name = f"azure-model-health-check-{model_id}"
|
model_name = f"azure-model-health-check-{model_id}"
|
||||||
print("adding model", model_name)
|
print("adding model", model_name)
|
||||||
await add_model_for_health_checking(session=session, model_id=model_id)
|
await add_model_for_health_checking(session=session, model_id=model_id)
|
||||||
|
_old_model_info = await get_specific_model_info_v2(
|
||||||
|
session=session, key=key, model_name=model_name
|
||||||
|
)
|
||||||
|
print("model info before test", _old_model_info)
|
||||||
|
|
||||||
await asyncio.sleep(30)
|
await asyncio.sleep(30)
|
||||||
print("calling /model/info")
|
print("calling /model/info")
|
||||||
await get_model_info(session=session, key=key)
|
await get_model_info(session=session, key=key)
|
||||||
|
@ -305,5 +337,28 @@ async def test_add_model_run_health():
|
||||||
_healthy_endpooint["model"] == "azure/chatgpt-v-2"
|
_healthy_endpooint["model"] == "azure/chatgpt-v-2"
|
||||||
) # this is the model that got added
|
) # this is the model that got added
|
||||||
|
|
||||||
|
# assert httpx client is is unchanges
|
||||||
|
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
|
||||||
|
_model_info_after_test = await get_specific_model_info_v2(
|
||||||
|
session=session, key=key, model_name=model_name
|
||||||
|
)
|
||||||
|
|
||||||
|
print("model info after test", _model_info_after_test)
|
||||||
|
old_openai_client = _old_model_info["openai_client"]
|
||||||
|
new_openai_client = _model_info_after_test["openai_client"]
|
||||||
|
print("old openai client", old_openai_client)
|
||||||
|
print("new openai client", new_openai_client)
|
||||||
|
|
||||||
|
"""
|
||||||
|
PROD TEST - This is extremly important
|
||||||
|
The OpenAI client used should be the same after 30 seconds
|
||||||
|
It is a serious bug if the openai client does not match here
|
||||||
|
"""
|
||||||
|
assert (
|
||||||
|
old_openai_client == new_openai_client
|
||||||
|
), "OpenAI client does not match for the same model after 30 seconds"
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
await delete_model(session=session, model_id=model_id)
|
await delete_model(session=session, model_id=model_id)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue