diff --git a/.circleci/config.yml b/.circleci/config.yml index c1224159a..9a29ed07c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -80,7 +80,7 @@ jobs: command: | pwd ls - python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 + python -m pytest -vv -s litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 no_output_timeout: 120m # Store test results diff --git a/.gitignore b/.gitignore index 730898a5b..00cd35c5b 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,4 @@ ui/litellm-dashboard/package-lock.json deploy/charts/litellm-helm/*.tgz deploy/charts/litellm-helm/charts/* deploy/charts/*.tgz +litellm/proxy/vertex_key.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 8ab4e3e92..8978e0d1a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,6 +10,12 @@ repos: exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/integrations/|^litellm/proxy/tests/ additional_dependencies: [flake8-print] files: litellm/.*\.py +- repo: local + hooks: + - id: check-files-match + name: Check if files match + entry: python3 ci_cd/check_files_match.py + language: system - repo: local hooks: - id: mypy diff --git a/ci_cd/check_files_match.py b/ci_cd/check_files_match.py new file mode 100644 index 000000000..18b6cf792 --- /dev/null +++ b/ci_cd/check_files_match.py @@ -0,0 +1,32 @@ +import sys +import filecmp +import shutil + + +def main(argv=None): + print( + "Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match." + ) + + file1 = "model_prices_and_context_window.json" + file2 = "litellm/model_prices_and_context_window_backup.json" + + cmp_result = filecmp.cmp(file1, file2, shallow=False) + + if cmp_result: + print(f"Passed! Files {file1} and {file2} match.") + return 0 + else: + print( + f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}." + ) + copy_content(file1, file2) + return 1 + + +def copy_content(source, destination): + shutil.copy2(source, destination) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md index 3de426ec3..ec8f3a029 100644 --- a/docs/my-website/docs/observability/langfuse_integration.md +++ b/docs/my-website/docs/observability/langfuse_integration.md @@ -122,6 +122,7 @@ response = completion( "generation_id": "gen-id22", # set langfuse Generation ID "trace_id": "trace-id22", # set langfuse Trace ID "trace_user_id": "user-id2", # set langfuse Trace User ID + "session_id": "session-1", # set langfuse Session ID }, ) diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md index dd5edc6da..c51bfc0ac 100644 --- a/docs/my-website/docs/proxy/virtual_keys.md +++ b/docs/my-website/docs/proxy/virtual_keys.md @@ -352,6 +352,22 @@ Request Params: } ``` +## Upperbound /key/generate params +Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key. + +Set `litellm_settings:upperbound_key_generate_params`: +```yaml +litellm_settings: + upperbound_key_generate_params: + max_budget: 100 # upperbound of $100, for all /key/generate requests + duration: "30d" # upperbound of 30 days for all /key/generate requests +``` + +** Expected Behavior ** + +- Send a `/key/generate` request with `max_budget=200` +- Key will be created with `max_budget=100` since 100 is the upper bound + ## Default /key/generate params Use this, if you need to control the default `max_budget` or any `key/generate` param per key. diff --git a/litellm/__init__.py b/litellm/__init__.py index 3f2a1e4b4..26b761c64 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -146,6 +146,7 @@ suppress_debug_info = False dynamodb_table_name: Optional[str] = None s3_callback_params: Optional[Dict] = None default_key_generate_params: Optional[Dict] = None +upperbound_key_generate_params: Optional[Dict] = None default_team_settings: Optional[List] = None #### RELIABILITY #### request_timeout: Optional[float] = 6000 diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py index cd37a93a3..82de33366 100644 --- a/litellm/integrations/langfuse.py +++ b/litellm/integrations/langfuse.py @@ -55,8 +55,21 @@ class LangFuseLogger: else: self.upstream_langfuse = None + # def log_error(kwargs, response_obj, start_time, end_time): + # generation = trace.generation( + # level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR + # status_message='error' # can be any string (e.g. stringified stack trace or error body) + # ) def log_event( - self, kwargs, response_obj, start_time, end_time, user_id, print_verbose + self, + kwargs, + response_obj, + start_time, + end_time, + user_id, + print_verbose, + level="DEFAULT", + status_message=None, ): # Method definition @@ -84,37 +97,49 @@ class LangFuseLogger: pass # end of processing langfuse ######################## - if kwargs.get("call_type", None) == "embedding" or isinstance( - response_obj, litellm.EmbeddingResponse + if ( + level == "ERROR" + and status_message is not None + and isinstance(status_message, str) + ): + input = prompt + output = status_message + elif response_obj is not None and ( + kwargs.get("call_type", None) == "embedding" + or isinstance(response_obj, litellm.EmbeddingResponse) ): input = prompt output = response_obj["data"] - else: + elif response_obj is not None: input = prompt output = response_obj["choices"][0]["message"].json() - print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") - self._log_langfuse_v2( - user_id, - metadata, - output, - start_time, - end_time, - kwargs, - optional_params, - input, - response_obj, - print_verbose, - ) if self._is_langfuse_v2() else self._log_langfuse_v1( - user_id, - metadata, - output, - start_time, - end_time, - kwargs, - optional_params, - input, - response_obj, - ) + print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}") + if self._is_langfuse_v2(): + self._log_langfuse_v2( + user_id, + metadata, + output, + start_time, + end_time, + kwargs, + optional_params, + input, + response_obj, + level, + print_verbose, + ) + elif response_obj is not None: + self._log_langfuse_v1( + user_id, + metadata, + output, + start_time, + end_time, + kwargs, + optional_params, + input, + response_obj, + ) self.Langfuse.flush() print_verbose( @@ -123,15 +148,15 @@ class LangFuseLogger: verbose_logger.info(f"Langfuse Layer Logging - logging success") except: traceback.print_exc() - print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}") + print(f"Langfuse Layer Error - {traceback.format_exc()}") pass async def _async_log_event( self, kwargs, response_obj, start_time, end_time, user_id, print_verbose ): - self.log_event( - kwargs, response_obj, start_time, end_time, user_id, print_verbose - ) + """ + TODO: support async calls when langfuse is truly async + """ def _is_langfuse_v2(self): import langfuse @@ -193,56 +218,78 @@ class LangFuseLogger: optional_params, input, response_obj, + level, print_verbose, ): import langfuse - tags = [] - supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") - supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") + try: + tags = [] + supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3") + supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3") - print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ") + print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ") - generation_name = metadata.get("generation_name", None) - if generation_name is None: - # just log `litellm-{call_type}` as the generation name - generation_name = f"litellm-{kwargs.get('call_type', 'completion')}" + generation_name = metadata.get("generation_name", None) + if generation_name is None: + # just log `litellm-{call_type}` as the generation name + generation_name = f"litellm-{kwargs.get('call_type', 'completion')}" - trace_params = { - "name": generation_name, - "input": input, - "output": output, - "user_id": metadata.get("trace_user_id", user_id), - "id": metadata.get("trace_id", None), - } - cost = kwargs["response_cost"] - print_verbose(f"trace: {cost}") - if supports_tags: - for key, value in metadata.items(): - tags.append(f"{key}:{value}") - if "cache_hit" in kwargs: - tags.append(f"cache_hit:{kwargs['cache_hit']}") - trace_params.update({"tags": tags}) + trace_params = { + "name": generation_name, + "input": input, + "user_id": metadata.get("trace_user_id", user_id), + "id": metadata.get("trace_id", None), + "session_id": metadata.get("session_id", None), + } - trace = self.Langfuse.trace(**trace_params) + if level == "ERROR": + trace_params["status_message"] = output + else: + trace_params["output"] = output - # get generation_id - generation_id = None - if response_obj.get("id", None) is not None: - generation_id = litellm.utils.get_logging_id(start_time, response_obj) - trace.generation( - name=generation_name, - id=metadata.get("generation_id", generation_id), - startTime=start_time, - endTime=end_time, - model=kwargs["model"], - modelParameters=optional_params, - input=input, - output=output, - usage={ - "prompt_tokens": response_obj["usage"]["prompt_tokens"], - "completion_tokens": response_obj["usage"]["completion_tokens"], - "total_cost": cost if supports_costs else None, - }, - metadata=metadata, - ) + cost = kwargs.get("response_cost", None) + print_verbose(f"trace: {cost}") + if supports_tags: + for key, value in metadata.items(): + tags.append(f"{key}:{value}") + if "cache_hit" in kwargs: + tags.append(f"cache_hit:{kwargs['cache_hit']}") + trace_params.update({"tags": tags}) + + trace = self.Langfuse.trace(**trace_params) + + if level == "ERROR": + trace.generation( + level="ERROR", # can be any of DEBUG, DEFAULT, WARNING or ERROR + status_message=output, # can be any string (e.g. stringified stack trace or error body) + ) + print(f"SUCCESSFULLY LOGGED ERROR") + else: + # get generation_id + generation_id = None + if ( + response_obj is not None + and response_obj.get("id", None) is not None + ): + generation_id = litellm.utils.get_logging_id( + start_time, response_obj + ) + trace.generation( + name=generation_name, + id=metadata.get("generation_id", generation_id), + startTime=start_time, + endTime=end_time, + model=kwargs["model"], + modelParameters=optional_params, + input=input, + output=output, + usage={ + "prompt_tokens": response_obj["usage"]["prompt_tokens"], + "completion_tokens": response_obj["usage"]["completion_tokens"], + "total_cost": cost if supports_costs else None, + }, + metadata=metadata, + ) + except Exception as e: + print(f"Langfuse Layer Error - {traceback.format_exc()}") diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py index d0bc24af4..9339deb78 100644 --- a/litellm/llms/ollama.py +++ b/litellm/llms/ollama.py @@ -146,7 +146,15 @@ def get_ollama_response( optional_params[k] = v stream = optional_params.pop("stream", False) - data = {"model": model, "prompt": prompt, "options": optional_params} + format = optional_params.pop("format", None) + data = { + "model": model, + "prompt": prompt, + "options": optional_params, + "stream": stream, + } + if format is not None: + data["format"] = format ## LOGGING logging_obj.pre_call( diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py index 95ff8dfaa..0311931b1 100644 --- a/litellm/llms/ollama_chat.py +++ b/litellm/llms/ollama_chat.py @@ -146,7 +146,15 @@ def get_ollama_response( optional_params[k] = v stream = optional_params.pop("stream", False) - data = {"model": model, "messages": messages, "options": optional_params} + format = optional_params.pop("format", None) + data = { + "model": model, + "messages": messages, + "options": optional_params, + "stream": stream, + } + if format is not None: + data["format"] = format ## LOGGING logging_obj.pre_call( input=None, @@ -320,11 +328,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj): model_response["choices"][0]["message"] = message else: model_response["choices"][0]["message"] = response_json["message"] + model_response["created"] = int(time.time()) - model_response["model"] = "ollama/" + data["model"] + model_response["model"] = "ollama_chat/" + data["model"] prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"])) # type: ignore completion_tokens = response_json.get( - "eval_count", litellm.token_counter(text=response_json["message"]) + "eval_count", + litellm.token_counter( + text=response_json["message"]["content"], count_response_tokens=True + ), ) model_response["usage"] = litellm.Usage( prompt_tokens=prompt_tokens, diff --git a/litellm/main.py b/litellm/main.py index bc33a69e5..384dadc32 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -263,6 +263,7 @@ async def acompletion( or custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat" or custom_llm_provider == "vertex_ai" + or custom_llm_provider in litellm.openai_compatible_providers ): # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all. init_response = await loop.run_in_executor(None, func_with_context) if isinstance(init_response, dict) or isinstance( @@ -3319,6 +3320,10 @@ async def ahealth_check( response = {} # args like remaining ratelimit etc. return response except Exception as e: + if model not in litellm.model_cost and mode is None: + raise Exception( + "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models" + ) return {"error": str(e)} diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index 454b2504a..4c28bdbe8 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -1,6 +1,8 @@ { "gpt-4": { - "max_tokens": 8192, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -8,6 +10,8 @@ }, "gpt-4-0314": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -15,6 +19,8 @@ }, "gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "openai", @@ -22,6 +28,8 @@ }, "gpt-4-32k": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -29,6 +37,8 @@ }, "gpt-4-32k-0314": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -36,6 +46,8 @@ }, "gpt-4-32k-0613": { "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", @@ -43,6 +55,17 @@ }, "gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat" + }, + "gpt-4-0125-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "openai", @@ -50,6 +73,17 @@ }, "gpt-4-vision-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "openai", + "mode": "chat" + }, + "gpt-4-1106-vision-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "openai", @@ -57,6 +91,8 @@ }, "gpt-3.5-turbo": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -64,6 +100,8 @@ }, "gpt-3.5-turbo-0301": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -71,6 +109,8 @@ }, "gpt-3.5-turbo-0613": { "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "openai", @@ -78,13 +118,26 @@ }, "gpt-3.5-turbo-1106": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000010, "output_cost_per_token": 0.0000020, "litellm_provider": "openai", "mode": "chat" }, + "gpt-3.5-turbo-0125": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000005, + "output_cost_per_token": 0.0000015, + "litellm_provider": "openai", + "mode": "chat" + }, "gpt-3.5-turbo-16k": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", @@ -92,6 +145,8 @@ }, "gpt-3.5-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "openai", @@ -99,11 +154,27 @@ }, "ft:gpt-3.5-turbo": { "max_tokens": 4097, - "input_cost_per_token": 0.000012, - "output_cost_per_token": 0.000016, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat" }, + "text-embedding-3-large": { + "max_tokens": 8191, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "text-embedding-3-small": { + "max_tokens": 8191, + "input_cost_per_token": 0.00000002, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, "text-embedding-ada-002": { "max_tokens": 8191, "input_cost_per_token": 0.0000001, @@ -111,41 +182,173 @@ "litellm_provider": "openai", "mode": "embedding" }, + "text-embedding-ada-002-v2": { + "max_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "openai", + "mode": "embedding" + }, + "256-x-256/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000024414, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "512-x-512/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000686, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "1024-x-1024/dall-e-2": { + "mode": "image_generation", + "input_cost_per_pixel": 0.000000019, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "hd/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1792/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1792-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, + "standard/1024-x-1024/dall-e-3": { + "mode": "image_generation", + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_pixel": 0.0, + "litellm_provider": "openai" + }, "azure/gpt-4-1106-preview": { "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, "input_cost_per_token": 0.00001, "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-4-32k": { + "azure/gpt-4-0613": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k-0613": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00006, + "output_cost_per_token": 0.00012, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-32k": { + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 4096, "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "azure", "mode": "chat" }, "azure/gpt-4": { - "max_tokens": 16385, + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.00003, "output_cost_per_token": 0.00006, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo-16k": { + "azure/gpt-4-turbo": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-4-turbo-vision-preview": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo-16k-0613": { "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, "input_cost_per_token": 0.000003, "output_cost_per_token": 0.000004, "litellm_provider": "azure", "mode": "chat" }, - "azure/gpt-3.5-turbo": { - "max_tokens": 4097, + "azure/gpt-35-turbo-1106": { + "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "azure", "mode": "chat" }, + "azure/gpt-35-turbo-16k": { + "max_tokens": 16385, + "max_input_tokens": 16385, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/gpt-35-turbo": { + "max_tokens": 4097, + "max_input_tokens": 4097, + "max_output_tokens": 4096, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, + "litellm_provider": "azure", + "mode": "chat" + }, + "azure/ada": { + "max_tokens": 8191, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "azure", + "mode": "embedding" + }, "azure/text-embedding-ada-002": { "max_tokens": 8191, "input_cost_per_token": 0.0000001, @@ -153,36 +356,52 @@ "litellm_provider": "azure", "mode": "embedding" }, - "text-davinci-003": { - "max_tokens": 4097, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.0000000381469, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-curie-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.000002, - "output_cost_per_token": 0.000002, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/hd/1024-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000007629, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-babbage-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, - "text-ada-001": { - "max_tokens": 2049, - "input_cost_per_token": 0.0000004, - "output_cost_per_token": 0.0000004, - "litellm_provider": "text-completion-openai", - "mode": "completion" + "azure/standard/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000004359, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1024-x-1792/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/hd/1792-x-1024/dall-e-3": { + "input_cost_per_pixel": 0.00000006539, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" + }, + "azure/standard/1024-x-1024/dall-e-2": { + "input_cost_per_pixel": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "azure", + "mode": "image_generation" }, "babbage-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000004, "output_cost_per_token": 0.0000004, "litellm_provider": "text-completion-openai", @@ -190,6 +409,8 @@ }, "davinci-002": { "max_tokens": 16384, + "max_input_tokens": 16384, + "max_output_tokens": 4096, "input_cost_per_token": 0.000002, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -197,6 +418,8 @@ }, "gpt-3.5-turbo-instruct": { "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 4096, "input_cost_per_token": 0.0000015, "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", @@ -210,6 +433,33 @@ "litellm_provider": "anthropic", "mode": "chat" }, + "mistral/mistral-tiny": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000015, + "output_cost_per_token": 0.00000046, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-small": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000066, + "output_cost_per_token": 0.00000197, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-medium": { + "max_tokens": 8192, + "input_cost_per_token": 0.00000273, + "output_cost_per_token": 0.00000820, + "litellm_provider": "mistral", + "mode": "chat" + }, + "mistral/mistral-embed": { + "max_tokens": 8192, + "input_cost_per_token": 0.000000111, + "litellm_provider": "mistral", + "mode": "embedding" + }, "claude-instant-1.2": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -248,6 +498,20 @@ "litellm_provider": "vertex_ai-text-models", "mode": "completion" }, + "text-unicorn": { + "max_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion" + }, + "text-unicorn@001": { + "max_tokens": 8192, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, + "litellm_provider": "vertex_ai-text-models", + "mode": "completion" + }, "chat-bison": { "max_tokens": 4096, "input_cost_per_token": 0.000000125, @@ -262,6 +526,13 @@ "litellm_provider": "vertex_ai-chat-models", "mode": "chat" }, + "chat-bison@002": { + "max_tokens": 4096, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-chat-models", + "mode": "chat" + }, "chat-bison-32k": { "max_tokens": 32000, "input_cost_per_token": 0.000000125, @@ -287,14 +558,21 @@ "max_tokens": 2048, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "litellm_provider": "vertex_ai-chat-models", + "litellm_provider": "vertex_ai-code-text-models", "mode": "completion" }, - "code-gecko@latest": { + "code-gecko@002": { "max_tokens": 2048, "input_cost_per_token": 0.000000125, "output_cost_per_token": 0.000000125, - "litellm_provider": "vertex_ai-chat-models", + "litellm_provider": "vertex_ai-code-text-models", + "mode": "completion" + }, + "code-gecko": { + "max_tokens": 2048, + "input_cost_per_token": 0.000000125, + "output_cost_per_token": 0.000000125, + "litellm_provider": "vertex_ai-code-text-models", "mode": "completion" }, "codechat-bison": { @@ -318,6 +596,67 @@ "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat" }, + "gemini-pro": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-language-models", + "mode": "chat" + }, + "gemini-pro-vision": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.00000025, + "output_cost_per_token": 0.0000005, + "litellm_provider": "vertex_ai-vision-models", + "mode": "chat" + }, + "textembedding-gecko": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko-multilingual": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko-multilingual@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko@001": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, + "textembedding-gecko@003": { + "max_tokens": 3072, + "max_input_tokens": 3072, + "output_vector_size": 768, + "input_cost_per_token": 0.00000000625, + "output_cost_per_token": 0, + "litellm_provider": "vertex_ai-embedding-models", + "mode": "embedding" + }, "palm/chat-bison": { "max_tokens": 4096, "input_cost_per_token": 0.000000125, @@ -360,6 +699,22 @@ "litellm_provider": "palm", "mode": "completion" }, + "gemini/gemini-pro": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "gemini", + "mode": "chat" + }, + "gemini/gemini-pro-vision": { + "max_tokens": 30720, + "max_output_tokens": 2048, + "input_cost_per_token": 0.0, + "output_cost_per_token": 0.0, + "litellm_provider": "gemini", + "mode": "chat" + }, "command-nightly": { "max_tokens": 4096, "input_cost_per_token": 0.000015, @@ -628,6 +983,14 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "amazon.titan-embed-text-v1": { + "max_tokens": 8192, + "output_vector_size": 1536, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0, + "litellm_provider": "bedrock", + "mode": "embedding" + }, "anthropic.claude-v1": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -636,6 +999,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-v2": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -644,6 +1103,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-v2:1": { "max_tokens": 200000, "max_output_tokens": 8191, @@ -652,6 +1207,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0455, + "output_cost_per_second": 0.0455, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02527, + "output_cost_per_second": 0.02527, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0415, + "output_cost_per_second": 0.0415, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.02305, + "output_cost_per_second": 0.02305, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.0175, + "output_cost_per_second": 0.0175, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00972, + "output_cost_per_second": 0.00972, + "litellm_provider": "bedrock", + "mode": "chat" + }, "anthropic.claude-instant-v1": { "max_tokens": 100000, "max_output_tokens": 8191, @@ -660,6 +1311,102 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/us-east-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.00611, + "output_cost_per_second": 0.00611, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/us-west-2/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.0000008, + "output_cost_per_token": 0.0000024, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000223, + "output_cost_per_token": 0.00000755, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01475, + "output_cost_per_second": 0.01475, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.008194, + "output_cost_per_second": 0.008194, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_token": 0.00000248, + "output_cost_per_token": 0.00000838, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.01635, + "output_cost_per_second": 0.01635, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": { + "max_tokens": 100000, + "max_output_tokens": 8191, + "input_cost_per_second": 0.009083, + "output_cost_per_second": 0.009083, + "litellm_provider": "bedrock", + "mode": "chat" + }, "cohere.command-text-v14": { "max_tokens": 4096, "input_cost_per_token": 0.0000015, @@ -667,6 +1414,55 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "bedrock/*/1-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.011, + "output_cost_per_second": 0.011, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.0066027, + "output_cost_per_second": 0.0066027, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.command-light-text-v14": { + "max_tokens": 4000, + "input_cost_per_token": 0.0000003, + "output_cost_per_token": 0.0000006, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/1-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.001902, + "output_cost_per_second": 0.001902, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "bedrock/*/6-month-commitment/cohere.command-light-text-v14": { + "max_tokens": 4096, + "input_cost_per_second": 0.0011416, + "output_cost_per_second": 0.0011416, + "litellm_provider": "bedrock", + "mode": "chat" + }, + "cohere.embed-english-v3": { + "max_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, + "cohere.embed-multilingual-v3": { + "max_tokens": 512, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "bedrock", + "mode": "embedding" + }, "meta.llama2-13b-chat-v1": { "max_tokens": 4096, "input_cost_per_token": 0.00000075, @@ -681,6 +1477,48 @@ "litellm_provider": "bedrock", "mode": "chat" }, + "512-x-512/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.018, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "512-x-512/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/50-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.036, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "max-x-max/max-steps/stability.stable-diffusion-xl-v0": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.072, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.04, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, + "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": { + "max_tokens": 77, + "max_input_tokens": 77, + "output_cost_per_image": 0.08, + "litellm_provider": "bedrock", + "mode": "image_generation" + }, "sagemaker/meta-textgeneration-llama-2-7b": { "max_tokens": 4096, "input_cost_per_token": 0.000, @@ -805,104 +1643,197 @@ "litellm_provider": "ollama", "mode": "completion" }, + "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/Gryphe/MythoMax-L2-13b": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, "deepinfra/meta-llama/Llama-2-70b-chat-hf": { "max_tokens": 4096, - "input_cost_per_token": 0.000000700, - "output_cost_per_token": 0.000000950, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, "litellm_provider": "deepinfra", "mode": "chat" }, "deepinfra/codellama/CodeLlama-34b-Instruct-hf": { "max_tokens": 4096, - "input_cost_per_token": 0.0000006, - "output_cost_per_token": 0.0000006, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/meta-llama/Llama-2-13b-chat-hf": { + }, + "deepinfra/deepinfra/mixtral": { "max_tokens": 4096, - "input_cost_per_token": 0.00000035, - "output_cost_per_token": 0.00000035, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, "litellm_provider": "deepinfra", - "mode": "chat" - }, - "deepinfra/meta-llama/Llama-2-7b-chat-hf": { + "mode": "completion" + }, + "deepinfra/Phind/Phind-CodeLlama-34B-v2": { "max_tokens": 4096, - "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000002, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": { + }, + "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000027, + "output_cost_per_token": 0.00000027, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/deepinfra/airoboros-70b": { "max_tokens": 4096, - "input_cost_per_token": 0.0000002, - "output_cost_per_token": 0.0000002, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, "litellm_provider": "deepinfra", "mode": "chat" - }, - "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { + }, + "deepinfra/01-ai/Yi-34B-Chat": { "max_tokens": 4096, - "input_cost_per_token": 0.0000007, - "output_cost_per_token": 0.00000095, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, "litellm_provider": "deepinfra", "mode": "chat" - }, - "perplexity/pplx-7b-chat": { + }, + "deepinfra/01-ai/Yi-6B-200K": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000090, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-13b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000022, + "output_cost_per_token": 0.00000022, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/amazon/MistralLite": { + "max_tokens": 32768, + "input_cost_per_token": 0.00000020, + "output_cost_per_token": 0.00000020, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/meta-llama/Llama-2-7b-chat-hf": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "deepinfra/01-ai/Yi-34B-200K": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000060, + "output_cost_per_token": 0.00000060, + "litellm_provider": "deepinfra", + "mode": "completion" + }, + "deepinfra/openchat/openchat_3.5": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000013, + "output_cost_per_token": 0.00000013, + "litellm_provider": "deepinfra", + "mode": "chat" + }, + "perplexity/codellama-34b-instruct": { + "max_tokens": 16384, + "input_cost_per_token": 0.00000035, + "output_cost_per_token": 0.00000140, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/codellama-70b-instruct": { + "max_tokens": 16384, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-chat": { "max_tokens": 8192, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-chat": { + }, + "perplexity/pplx-70b-chat": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, + "litellm_provider": "perplexity", + "mode": "chat" + }, + "perplexity/pplx-7b-online": { "max_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "output_cost_per_token": 0.00000028, + "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-7b-online": { + }, + "perplexity/pplx-70b-online": { "max_tokens": 4096, "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + "output_cost_per_token": 0.00000280, + "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/pplx-70b-online": { + }, + "perplexity/llama-2-70b-chat": { "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.0005, + "input_cost_per_token": 0.00000070, + "output_cost_per_token": 0.00000280, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-13b-chat": { + }, + "perplexity/mistral-7b-instruct": { "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, "litellm_provider": "perplexity", "mode": "chat" - }, - "perplexity/llama-2-70b-chat": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, - "perplexity/mistral-7b-instruct": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, - "perplexity/replit-code-v1.5-3b": { - "max_tokens": 4096, - "input_cost_per_token": 0.0000000, - "output_cost_per_token": 0.000000, - "litellm_provider": "perplexity", - "mode": "chat" - }, + }, + "perplexity/mixtral-8x7b-instruct": { + "max_tokens": 4096, + "input_cost_per_token": 0.00000007, + "output_cost_per_token": 0.00000028, + "litellm_provider": "perplexity", + "mode": "chat" + }, "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { "max_tokens": 16384, "input_cost_per_token": 0.00000015, @@ -944,5 +1875,48 @@ "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat" - } + }, + "cloudflare/@cf/meta/llama-2-7b-chat-fp16": { + "max_tokens": 3072, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/meta/llama-2-7b-chat-int8": { + "max_tokens": 2048, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": { + "max_tokens": 8192, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": { + "max_tokens": 4096, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, + "litellm_provider": "cloudflare", + "mode": "chat" + }, + "voyage/voyage-01": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + }, + "voyage/voyage-lite-01": { + "max_tokens": 4096, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.000000, + "litellm_provider": "voyage", + "mode": "embedding" + } + } diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml index 326544f41..a8144e9d4 100644 --- a/litellm/proxy/proxy_config.yaml +++ b/litellm/proxy/proxy_config.yaml @@ -78,7 +78,9 @@ litellm_settings: type: "redis-semantic" similarity_threshold: 0.8 redis_semantic_cache_embedding_model: azure-embedding-model - # cache: True + upperbound_key_generate_params: + max_budget: 100 + duration: "30d" # setting callback class # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance] diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py index 70e602e99..661e932f3 100644 --- a/litellm/proxy/proxy_server.py +++ b/litellm/proxy/proxy_server.py @@ -636,6 +636,36 @@ async def user_api_key_auth( raise Exception( f"Only master key can be used to generate, delete, update or get info for new keys/users. Value of allow_user_auth={allow_user_auth}" ) + + # check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions + # sso/login, ui/login, /key functions and /user functions + # this will never be allowed to call /chat/completions + token_team = getattr(valid_token, "team_id", None) + if token_team is not None: + if token_team == "litellm-dashboard": + # this token is only used for managing the ui + allowed_routes = [ + "/sso", + "/login", + "/key", + "/spend", + "/user", + ] + # check if the current route startswith any of the allowed routes + if ( + route is not None + and isinstance(route, str) + and any( + route.startswith(allowed_route) + for allowed_route in allowed_routes + ) + ): + # Do something if the current route starts with any of the allowed routes + pass + else: + raise Exception( + f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed" + ) return UserAPIKeyAuth(api_key=api_key, **valid_token_dict) else: raise Exception(f"Invalid Key Passed to LiteLLM Proxy") @@ -758,9 +788,10 @@ async def _PROXY_track_cost_callback( verbose_proxy_logger.info( f"response_cost {response_cost}, for user_id {user_id}" ) - if user_api_key and ( - prisma_client is not None or custom_db_client is not None - ): + verbose_proxy_logger.debug( + f"user_api_key {user_api_key}, prisma_client: {prisma_client}, custom_db_client: {custom_db_client}" + ) + if user_api_key is not None: await update_database( token=user_api_key, response_cost=response_cost, @@ -770,6 +801,8 @@ async def _PROXY_track_cost_callback( start_time=start_time, end_time=end_time, ) + else: + raise Exception("User API key missing from custom callback.") else: if kwargs["stream"] != True or ( kwargs["stream"] == True @@ -1361,6 +1394,26 @@ class ProxyConfig: proxy_config = ProxyConfig() +def _duration_in_seconds(duration: str): + match = re.match(r"(\d+)([smhd]?)", duration) + if not match: + raise ValueError("Invalid duration format") + + value, unit = match.groups() + value = int(value) + + if unit == "s": + return value + elif unit == "m": + return value * 60 + elif unit == "h": + return value * 3600 + elif unit == "d": + return value * 86400 + else: + raise ValueError("Unsupported duration unit") + + async def generate_key_helper_fn( duration: Optional[str], models: list, @@ -1395,25 +1448,6 @@ async def generate_key_helper_fn( if token is None: token = f"sk-{secrets.token_urlsafe(16)}" - def _duration_in_seconds(duration: str): - match = re.match(r"(\d+)([smhd]?)", duration) - if not match: - raise ValueError("Invalid duration format") - - value, unit = match.groups() - value = int(value) - - if unit == "s": - return value - elif unit == "m": - return value * 60 - elif unit == "h": - return value * 3600 - elif unit == "d": - return value * 86400 - else: - raise ValueError("Unsupported duration unit") - if duration is None: # allow tokens that never expire expires = None else: @@ -2630,6 +2664,36 @@ async def generate_key_fn( elif key == "metadata" and value == {}: setattr(data, key, litellm.default_key_generate_params.get(key, {})) + # check if user set default key/generate params on config.yaml + if litellm.upperbound_key_generate_params is not None: + for elem in data: + # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key] + key, value = elem + if value is not None and key in litellm.upperbound_key_generate_params: + # if value is float/int + if key in [ + "max_budget", + "max_parallel_requests", + "tpm_limit", + "rpm_limit", + ]: + if value > litellm.upperbound_key_generate_params[key]: + # directly compare floats/ints + setattr( + data, key, litellm.upperbound_key_generate_params[key] + ) + elif key == "budget_duration": + # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m) + # compare the duration in seconds and max duration in seconds + upperbound_budget_duration = _duration_in_seconds( + duration=litellm.upperbound_key_generate_params[key] + ) + user_set_budget_duration = _duration_in_seconds(duration=value) + if user_set_budget_duration > upperbound_budget_duration: + setattr( + data, key, litellm.upperbound_key_generate_params[key] + ) + data_json = data.json() # type: ignore # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index bd0301f20..e0ee05d4f 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -41,7 +41,7 @@ def test_completion_custom_provider_model_name(): messages=messages, logger_fn=logger_fn, ) - # Add any assertions here to check the, response + # Add any assertions here to check the,response print(response) print(response["choices"][0]["finish_reason"]) except litellm.Timeout as e: diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml index 8441018e3..ccebe016d 100644 --- a/litellm/tests/test_configs/test_config_no_auth.yaml +++ b/litellm/tests/test_configs/test_config_no_auth.yaml @@ -9,21 +9,11 @@ model_list: api_key: os.environ/AZURE_CANADA_API_KEY model: azure/gpt-35-turbo model_name: azure-model -- litellm_params: - api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1 - api_key: os.environ/AZURE_API_KEY - model: azure/chatgpt-v-2 - model_name: azure-cloudflare-model - litellm_params: api_base: https://openai-france-1234.openai.azure.com api_key: os.environ/AZURE_FRANCE_API_KEY model: azure/gpt-turbo model_name: azure-model -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - model_name: test_openai_models - litellm_params: model: gpt-3.5-turbo model_info: @@ -36,93 +26,8 @@ model_list: description: this is a test openai model id: 4d1ee26c-abca-450c-8744-8e87fd6755e9 model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 00e19c0f-b63d-42bb-88e9-016fb0c60764 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 79fc75bf-8e1b-47d5-8d24-9365a854af03 - model_name: test_openai_models -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-07-01-preview - model: azure/azure-embedding-model - model_info: - mode: embedding - model_name: azure-embedding-model -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 55848c55-4162-40f9-a6e2-9a722b9ef404 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 34339b1e-e030-4bcc-a531-c48559f10ce4 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: f6f74e14-ac64-4403-9365-319e584dcdc5 - model_name: test_openai_models -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 9b1ef341-322c-410a-8992-903987fef439 - model_name: test_openai_models - litellm_params: model: bedrock/amazon.titan-embed-text-v1 model_info: mode: embedding model_name: amazon-embeddings -- litellm_params: - model: sagemaker/berri-benchmarking-gpt-j-6b-fp16 - model_info: - mode: embedding - model_name: GPT-J 6B - Sagemaker Text Embedding (Internal) -- litellm_params: - model: dall-e-3 - model_info: - mode: image_generation - model_name: dall-e-3 -- litellm_params: - api_base: os.environ/AZURE_SWEDEN_API_BASE - api_key: os.environ/AZURE_SWEDEN_API_KEY - api_version: 2023-12-01-preview - model: azure/dall-e-3-test - model_info: - mode: image_generation - model_name: dall-e-3 -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-06-01-preview - model: azure/ - model_info: - mode: image_generation - model_name: dall-e-2 -- litellm_params: - api_base: os.environ/AZURE_API_BASE - api_key: os.environ/AZURE_API_KEY - api_version: 2023-07-01-preview - model: azure/azure-embedding-model - model_info: - base_model: text-embedding-ada-002 - mode: embedding - model_name: text-embedding-ada-002 -- litellm_params: - model: gpt-3.5-turbo - model_info: - description: this is a test openai model - id: 34cb2419-7c63-44ae-a189-53f1d1ce5953 - model_name: test_openai_models diff --git a/litellm/tests/test_key_generate_dynamodb.py b/litellm/tests/test_key_generate_dynamodb.py index 61d0ff6a6..e77dc7472 100644 --- a/litellm/tests/test_key_generate_dynamodb.py +++ b/litellm/tests/test_key_generate_dynamodb.py @@ -490,8 +490,13 @@ def test_dynamo_db_migration(custom_db_client): try: async def test(): + request = GenerateKeyRequest(max_budget=1) + key = await generate_key_fn(request) + print(key) + + generated_key = key.key bearer_token = ( - "Bearer " + "sk-elJDL2pOEjcAuC7zD4psAg" + "Bearer " + generated_key ) # this works with ishaan's db, it's a never expiring key request = Request(scope={"type": "http"}) diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py index 734a0b114..b4c86afb2 100644 --- a/litellm/tests/test_key_generate_prisma.py +++ b/litellm/tests/test_key_generate_prisma.py @@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import ( info_key_fn, update_key_fn, generate_key_fn, + generate_key_helper_fn, spend_user_fn, spend_key_fn, view_spend_logs, @@ -1278,6 +1279,40 @@ async def test_default_key_params(prisma_client): pytest.fail(f"Got exception {e}") +@pytest.mark.asyncio() +async def test_upperbound_key_params(prisma_client): + """ + - create key + - get key info + - assert key_name is not null + """ + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + litellm.upperbound_key_generate_params = { + "max_budget": 0.001, + "budget_duration": "1m", + } + await litellm.proxy.proxy_server.prisma_client.connect() + try: + request = GenerateKeyRequest( + max_budget=200000, + budget_duration="30d", + ) + key = await generate_key_fn(request) + generated_key = key.key + + result = await info_key_fn(key=generated_key) + key_info = result["info"] + # assert it used the upper bound for max_budget, and budget_duration + assert key_info["max_budget"] == 0.001 + assert key_info["budget_duration"] == "1m" + + print(result) + except Exception as e: + print("Got Exception", e) + pytest.fail(f"Got exception {e}") + + def test_get_bearer_token(): from litellm.proxy.proxy_server import _get_bearer_token @@ -1378,3 +1413,35 @@ async def test_user_api_key_auth_without_master_key(prisma_client): except Exception as e: print("Got Exception", e) pytest.fail(f"Got exception {e}") + + +@pytest.mark.asyncio +async def test_key_with_no_permissions(prisma_client): + """ + - create key + - get key info + - assert key_name is null + """ + setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client) + setattr(litellm.proxy.proxy_server, "master_key", "sk-1234") + setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False}) + await litellm.proxy.proxy_server.prisma_client.connect() + try: + response = await generate_key_helper_fn( + **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"} # type: ignore + ) + + print(response) + key = response["token"] + + # make a /chat/completions call -> it should fail + request = Request(scope={"type": "http"}) + request._url = URL(url="/chat/completions") + + # use generated key to auth in + result = await user_api_key_auth(request=request, api_key="Bearer " + key) + print("result from user auth with new key", result) + pytest.fail(f"This should have failed!. IT's an invalid key") + except Exception as e: + print("Got Exception", e) + print(e.message) diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py index 34dc0e3b5..528bb19d2 100644 --- a/litellm/tests/test_parallel_request_limiter.py +++ b/litellm/tests/test_parallel_request_limiter.py @@ -379,6 +379,7 @@ async def test_normal_router_tpm_limit(): ) except Exception as e: + print("Exception on test_normal_router_tpm_limit", e) assert e.status_code == 429 diff --git a/litellm/tests/test_proxy_startup.py b/litellm/tests/test_proxy_startup.py index 650e2f8a7..a846c9f4a 100644 --- a/litellm/tests/test_proxy_startup.py +++ b/litellm/tests/test_proxy_startup.py @@ -33,6 +33,11 @@ def test_proxy_gunicorn_startup_direct_config(): Test both approaches """ try: + from litellm._logging import verbose_proxy_logger, verbose_router_logger + import logging + + verbose_proxy_logger.setLevel(level=logging.DEBUG) + verbose_router_logger.setLevel(level=logging.DEBUG) filepath = os.path.dirname(os.path.abspath(__file__)) # test with worker_config = config yaml config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml" @@ -48,6 +53,11 @@ def test_proxy_gunicorn_startup_direct_config(): def test_proxy_gunicorn_startup_config_dict(): try: + from litellm._logging import verbose_proxy_logger, verbose_router_logger + import logging + + verbose_proxy_logger.setLevel(level=logging.DEBUG) + verbose_router_logger.setLevel(level=logging.DEBUG) filepath = os.path.dirname(os.path.abspath(__file__)) # test with worker_config = config yaml config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml" diff --git a/litellm/utils.py b/litellm/utils.py index d0aded4e5..d797a4909 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -980,12 +980,9 @@ class Logging: self.model_call_details["log_event_type"] = "post_api_call" # User Logging -> if you pass in a custom logging function - verbose_logger.debug( + print_verbose( f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n" ) - verbose_logger.debug( - f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}" - ) if self.logger_fn and callable(self.logger_fn): try: self.logger_fn( @@ -1636,34 +1633,6 @@ class Logging: end_time=end_time, print_verbose=print_verbose, ) - if callback == "langfuse": - global langFuseLogger - print_verbose("reaches Async langfuse for logging!") - kwargs = {} - for k, v in self.model_call_details.items(): - if ( - k != "original_response" - ): # copy.deepcopy raises errors as this could be a coroutine - kwargs[k] = v - # this only logs streaming once, complete_streaming_response exists i.e when stream ends - if self.stream: - if "complete_streaming_response" not in kwargs: - return - else: - print_verbose( - "reaches Async langfuse for streaming logging!" - ) - result = kwargs["complete_streaming_response"] - if langFuseLogger is None: - langFuseLogger = LangFuseLogger() - await langFuseLogger._async_log_event( - kwargs=kwargs, - response_obj=result, - start_time=start_time, - end_time=end_time, - user_id=kwargs.get("user", None), - print_verbose=print_verbose, - ) except: print_verbose( f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}" @@ -1788,9 +1757,37 @@ class Logging: response_obj=result, kwargs=self.model_call_details, ) + elif callback == "langfuse": + global langFuseLogger + verbose_logger.debug("reaches langfuse for logging!") + kwargs = {} + for k, v in self.model_call_details.items(): + if ( + k != "original_response" + ): # copy.deepcopy raises errors as this could be a coroutine + kwargs[k] = v + # this only logs streaming once, complete_streaming_response exists i.e when stream ends + if langFuseLogger is None or ( + self.langfuse_public_key != langFuseLogger.public_key + and self.langfuse_secret != langFuseLogger.secret_key + ): + langFuseLogger = LangFuseLogger( + langfuse_public_key=self.langfuse_public_key, + langfuse_secret=self.langfuse_secret, + ) + langFuseLogger.log_event( + start_time=start_time, + end_time=end_time, + response_obj=None, + user_id=kwargs.get("user", None), + print_verbose=print_verbose, + status_message=str(exception), + level="ERROR", + kwargs=self.model_call_details, + ) except Exception as e: print_verbose( - f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}" + f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}" ) print_verbose( f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}" @@ -3860,6 +3857,8 @@ def get_optional_params( and custom_llm_provider != "text-completion-openai" and custom_llm_provider != "azure" and custom_llm_provider != "vertex_ai" + and custom_llm_provider != "anyscale" + and custom_llm_provider != "together_ai" ): if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat": # ollama actually supports json output @@ -3878,11 +3877,6 @@ def get_optional_params( optional_params[ "functions_unsupported_model" ] = non_default_params.pop("functions") - elif ( - custom_llm_provider == "anyscale" - and model == "mistralai/Mistral-7B-Instruct-v0.1" - ): # anyscale just supports function calling with mistral - pass elif ( litellm.add_function_to_prompt ): # if user opts to add it to prompt instead @@ -4095,6 +4089,8 @@ def get_optional_params( "top_p", "stop", "frequency_penalty", + "tools", + "tool_choice", ] _check_valid_arg(supported_params=supported_params) @@ -4112,6 +4108,10 @@ def get_optional_params( ] = frequency_penalty # https://docs.together.ai/reference/inference if stop is not None: optional_params["stop"] = stop + if tools is not None: + optional_params["tools"] = tools + if tool_choice is not None: + optional_params["tool_choice"] = tool_choice elif custom_llm_provider == "ai21": ## check if unsupported param passed in supported_params = [ diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index b6ded001c..4c28bdbe8 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -156,8 +156,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 0.000012, - "output_cost_per_token": 0.000016, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat" }, diff --git a/pyproject.toml b/pyproject.toml index 256624417..17d80ae8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "litellm" -version = "1.22.4" +version = "1.22.8" description = "Library to easily interface with LLM API providers" authors = ["BerriAI"] license = "MIT" @@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"] build-backend = "poetry.core.masonry.api" [tool.commitizen] -version = "1.22.4" +version = "1.22.8" version_files = [ "pyproject.toml:^version" ] diff --git a/requirements.txt b/requirements.txt index 6b82c993a..3ace5872a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ redisvl==0.0.7 # semantic caching numpy==1.24.3 # semantic caching prisma==0.11.0 # for db mangum==0.17.0 # for aws lambda functions -google-generativeai==0.1.0 # for vertex ai calls +google-generativeai==0.3.2 # for vertex ai calls async_generator==1.10.0 # for async ollama calls traceloop-sdk==0.5.3 # for open telemetry logging langfuse>=2.6.3 # for langfuse self-hosted logging diff --git a/ui/litellm-dashboard/src/app/layout.tsx b/ui/litellm-dashboard/src/app/layout.tsx index 3314e4780..a04a0d66e 100644 --- a/ui/litellm-dashboard/src/app/layout.tsx +++ b/ui/litellm-dashboard/src/app/layout.tsx @@ -5,8 +5,8 @@ import "./globals.css"; const inter = Inter({ subsets: ["latin"] }); export const metadata: Metadata = { - title: "Create Next App", - description: "Generated by create next app", + title: "🚅 LiteLLM", + description: "LiteLLM Proxy Admin UI", }; export default function RootLayout({