diff --git a/litellm/__init__.py b/litellm/__init__.py index 1de60c761..510832a57 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -335,9 +335,9 @@ baseten_models: List = [ # used for token counting # Azure returns gpt-35-turbo in their responses, we need to map this to azure/gpt-3.5-turbo for token counting azure_llms = { - "gpt-35-turbo": "azure/gpt-3.5-turbo", - "gpt-35-turbo-16k": "azure/gpt-3.5-turbo-16k", - "gpt-35-turbo-instruct": "azure/gpt-3.5-turbo-instruct", + "gpt-35-turbo": "azure/gpt-35-turbo", + "gpt-35-turbo-16k": "azure/gpt-35-turbo-16k", + "gpt-35-turbo-instruct": "azure/gpt-35-turbo-instruct", } petals_models = [ diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index f3005759c..c39b1cda4 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -118,28 +118,56 @@ "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-4-32k": { + "azure/gpt-4-0613": { "max_tokens": 8192, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k-0613": { + "max_tokens": 32768, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k": { + "max_tokens": 32768, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-4": { - "max_tokens": 16385, + "max_tokens": 8192, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo-16k": { + "azure/gpt-35-turbo-16k-0613": { "max_tokens": 16385, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo": { + "azure/gpt-35-turbo-1106": { + "max_tokens": 16384, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo-16k": { + "max_tokens": 16385, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo": { "max_tokens": 4097, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002,