diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 690c24da4..c9ba4c215 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -1093,8 +1093,11 @@ async def model_info(request: Request): data = copy.deepcopy(model["litellm_params"]) data["messages"] = [{"role": "user", "content": "Hey, how's it going?"}] data["max_tokens"] = 10 + print(f"data going to litellm acompletion: {data}") response = await litellm.acompletion(**data) - litellm_model_info = litellm.model_cost.get(response["model"], {}) + response_model = response["model"] + print(f"response model: {response_model}; response - {response}") + litellm_model_info = litellm.get_model_info(response_model) model_info = model.get("model_info", {}) for k, v in litellm_model_info.items(): if k not in model_info: diff --git a/litellm/utils.py b/litellm/utils.py index bed6d1cda..280a6342f 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -2703,6 +2703,13 @@ def get_model_info(model: str): except requests.exceptions.RequestException as e: return None try: + azure_llms = { + "gpt-35-turbo": "azure/gpt-3.5-turbo", + "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k", + "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct" + } + if model in azure_llms: + model = azure_llms[model] if model in litellm.model_cost: return litellm.model_cost[model] model, custom_llm_provider, _, _ = get_llm_provider(model=model)