LiteLLM Minor Fixes & Improvements (10/09/2024) (#6139)

* fix(utils.py): don't return 'none' response headers

Fixes https://github.com/BerriAI/litellm/issues/6123

* fix(vertex_and_google_ai_studio_gemini.py): support parsing out additional properties and strict value for tool calls

Fixes https://github.com/BerriAI/litellm/issues/6136

* fix(cost_calculator.py): set default character value to none

Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403290196

* fix(google.py): fix cost per token / cost per char conversion

Fixes https://github.com/BerriAI/litellm/issues/6133#issuecomment-2403370287

* build(model_prices_and_context_window.json): update gemini pricing

Fixes https://github.com/BerriAI/litellm/issues/6133

* build(model_prices_and_context_window.json): update gemini pricing

* fix(litellm_logging.py): fix streaming caching logging when 'turn_off_message_logging' enabled

Stores unredacted response in cache

* build(model_prices_and_context_window.json): update gemini-1.5-flash pricing

* fix(cost_calculator.py): fix default prompt_character count logic

Fixes error in gemini cost calculation

* fix(cost_calculator.py): fix cost calc for tts models
This commit is contained in:
Krish Dholakia 2024-10-10 00:42:11 -07:00 committed by GitHub
parent 60baa65e0e
commit 6005450c8f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 788 additions and 534 deletions

View file

@ -2106,20 +2106,20 @@
"max_tokens": 8192,
"max_input_tokens": 2097152,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2132,20 +2132,20 @@
"max_tokens": 8192,
"max_input_tokens": 2097152,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2158,20 +2158,20 @@
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2184,20 +2184,20 @@
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2210,20 +2210,20 @@
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2236,20 +2236,20 @@
"max_tokens": 8192,
"max_input_tokens": 1000000,
"max_output_tokens": 8192,
"input_cost_per_image": 0.001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_video_per_second": 0.001315,
"input_cost_per_token": 0.000005,
"input_cost_per_character": 0.00000125,
"input_cost_per_token_above_128k_tokens": 0.00001,
"input_cost_per_character_above_128k_tokens": 0.0000025,
"output_cost_per_token": 0.000015,
"output_cost_per_character": 0.00000375,
"output_cost_per_token_above_128k_tokens": 0.00003,
"output_cost_per_character_above_128k_tokens": 0.0000075,
"output_cost_per_image": 0.00263,
"output_cost_per_video_per_second": 0.00263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image": 0.00032875,
"input_cost_per_audio_per_second": 0.00003125,
"input_cost_per_video_per_second": 0.00032875,
"input_cost_per_token": 0.000000078125,
"input_cost_per_character": 0.0000003125,
"input_cost_per_image_above_128k_tokens": 0.0006575,
"input_cost_per_video_per_second_above_128k_tokens": 0.0006575,
"input_cost_per_audio_per_second_above_128k_tokens": 0.0000625,
"input_cost_per_token_above_128k_tokens": 0.00000015625,
"input_cost_per_character_above_128k_tokens": 0.000000625,
"output_cost_per_token": 0.0000003125,
"output_cost_per_character": 0.00000125,
"output_cost_per_token_above_128k_tokens": 0.000000625,
"output_cost_per_character_above_128k_tokens": 0.0000025,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_function_calling": true,
@ -2267,20 +2267,20 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.0000005,
"input_cost_per_character": 0.000000125,
"input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015,
"output_cost_per_character": 0.000000375,
"output_cost_per_token_above_128k_tokens": 0.000003,
"output_cost_per_character_above_128k_tokens": 0.00000075,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image_above_128k_tokens": 0.00004,
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_token": 0.0000000046875,
"output_cost_per_character": 0.00000001875,
"output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2299,20 +2299,20 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.0000005,
"input_cost_per_character": 0.000000125,
"input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015,
"output_cost_per_character": 0.000000375,
"output_cost_per_token_above_128k_tokens": 0.000003,
"output_cost_per_character_above_128k_tokens": 0.00000075,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image_above_128k_tokens": 0.00004,
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_token": 0.0000000046875,
"output_cost_per_character": 0.00000001875,
"output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2331,20 +2331,20 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.0000005,
"input_cost_per_character": 0.000000125,
"input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015,
"output_cost_per_character": 0.000000375,
"output_cost_per_token_above_128k_tokens": 0.000003,
"output_cost_per_character_above_128k_tokens": 0.00000075,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image_above_128k_tokens": 0.00004,
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_token": 0.0000000046875,
"output_cost_per_character": 0.00000001875,
"output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2363,20 +2363,20 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.0000005,
"input_cost_per_character": 0.000000125,
"input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015,
"output_cost_per_character": 0.000000375,
"output_cost_per_token_above_128k_tokens": 0.000003,
"output_cost_per_character_above_128k_tokens": 0.00000075,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image_above_128k_tokens": 0.00004,
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_token": 0.0000000046875,
"output_cost_per_character": 0.00000001875,
"output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,
@ -2395,20 +2395,20 @@
"max_audio_length_hours": 8.4,
"max_audio_per_prompt": 1,
"max_pdf_size_mb": 30,
"input_cost_per_image": 0.0001315,
"input_cost_per_video_per_second": 0.0001315,
"input_cost_per_audio_per_second": 0.000125,
"input_cost_per_token": 0.0000005,
"input_cost_per_character": 0.000000125,
"input_cost_per_image": 0.00002,
"input_cost_per_video_per_second": 0.00002,
"input_cost_per_audio_per_second": 0.000002,
"input_cost_per_token": 0.000000004688,
"input_cost_per_character": 0.00000001875,
"input_cost_per_token_above_128k_tokens": 0.000001,
"input_cost_per_character_above_128k_tokens": 0.00000025,
"output_cost_per_token": 0.0000015,
"output_cost_per_character": 0.000000375,
"output_cost_per_token_above_128k_tokens": 0.000003,
"output_cost_per_character_above_128k_tokens": 0.00000075,
"output_cost_per_image": 0.000263,
"output_cost_per_video_per_second": 0.000263,
"output_cost_per_audio_per_second": 0.00025,
"input_cost_per_image_above_128k_tokens": 0.00004,
"input_cost_per_video_per_second_above_128k_tokens": 0.00004,
"input_cost_per_audio_per_second_above_128k_tokens": 0.000004,
"output_cost_per_token": 0.0000000046875,
"output_cost_per_character": 0.00000001875,
"output_cost_per_token_above_128k_tokens": 0.000000009375,
"output_cost_per_character_above_128k_tokens": 0.0000000375,
"litellm_provider": "vertex_ai-language-models",
"mode": "chat",
"supports_system_messages": true,