forked from phoenix/litellm-mirror
fix(proxy_server.py): support model info augmenting for azure models
This commit is contained in:
parent
ecddb852a2
commit
add4dfc528
2 changed files with 11 additions and 1 deletions
|
@ -1093,8 +1093,11 @@ async def model_info(request: Request):
|
|||
data = copy.deepcopy(model["litellm_params"])
|
||||
data["messages"] = [{"role": "user", "content": "Hey, how's it going?"}]
|
||||
data["max_tokens"] = 10
|
||||
print(f"data going to litellm acompletion: {data}")
|
||||
response = await litellm.acompletion(**data)
|
||||
litellm_model_info = litellm.model_cost.get(response["model"], {})
|
||||
response_model = response["model"]
|
||||
print(f"response model: {response_model}; response - {response}")
|
||||
litellm_model_info = litellm.get_model_info(response_model)
|
||||
model_info = model.get("model_info", {})
|
||||
for k, v in litellm_model_info.items():
|
||||
if k not in model_info:
|
||||
|
|
|
@ -2703,6 +2703,13 @@ def get_model_info(model: str):
|
|||
except requests.exceptions.RequestException as e:
|
||||
return None
|
||||
try:
|
||||
azure_llms = {
|
||||
"gpt-35-turbo": "azure/gpt-3.5-turbo",
|
||||
"gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k",
|
||||
"gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct"
|
||||
}
|
||||
if model in azure_llms:
|
||||
model = azure_llms[model]
|
||||
if model in litellm.model_cost:
|
||||
return litellm.model_cost[model]
|
||||
model, custom_llm_provider, _, _ = get_llm_provider(model=model)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue