diff --git a/.circleci/config.yml b/.circleci/config.yml
index c1224159a..9a29ed07c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -80,7 +80,7 @@ jobs:
           command: |
             pwd
             ls
-            python -m pytest -vv litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 
+            python -m pytest -vv -s litellm/tests/ -x --junitxml=test-results/junit.xml --durations=5 
           no_output_timeout: 120m
 
       # Store test results
diff --git a/.gitignore b/.gitignore
index 730898a5b..00cd35c5b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,4 @@ ui/litellm-dashboard/package-lock.json
 deploy/charts/litellm-helm/*.tgz
 deploy/charts/litellm-helm/charts/*
 deploy/charts/*.tgz
+litellm/proxy/vertex_key.json
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 8ab4e3e92..8978e0d1a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -10,6 +10,12 @@ repos:
        exclude: ^litellm/tests/|^litellm/proxy/proxy_cli.py|^litellm/integrations/|^litellm/proxy/tests/
        additional_dependencies: [flake8-print]
        files: litellm/.*\.py
+-   repo: local
+    hooks:
+    -   id: check-files-match
+        name: Check if files match
+        entry: python3 ci_cd/check_files_match.py
+        language: system
 -   repo: local
     hooks:
     -   id: mypy
diff --git a/ci_cd/check_files_match.py b/ci_cd/check_files_match.py
new file mode 100644
index 000000000..18b6cf792
--- /dev/null
+++ b/ci_cd/check_files_match.py
@@ -0,0 +1,32 @@
+import sys
+import filecmp
+import shutil
+
+
+def main(argv=None):
+    print(
+        "Comparing model_prices_and_context_window and litellm/model_prices_and_context_window_backup.json files... checking if they match."
+    )
+
+    file1 = "model_prices_and_context_window.json"
+    file2 = "litellm/model_prices_and_context_window_backup.json"
+
+    cmp_result = filecmp.cmp(file1, file2, shallow=False)
+
+    if cmp_result:
+        print(f"Passed! Files {file1} and {file2} match.")
+        return 0
+    else:
+        print(
+            f"Failed! Files {file1} and {file2} do not match. Copying content from {file1} to {file2}."
+        )
+        copy_content(file1, file2)
+        return 1
+
+
+def copy_content(source, destination):
+    shutil.copy2(source, destination)
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/docs/my-website/docs/observability/langfuse_integration.md b/docs/my-website/docs/observability/langfuse_integration.md
index 3de426ec3..ec8f3a029 100644
--- a/docs/my-website/docs/observability/langfuse_integration.md
+++ b/docs/my-website/docs/observability/langfuse_integration.md
@@ -122,6 +122,7 @@ response = completion(
       "generation_id": "gen-id22",                  # set langfuse Generation ID 
       "trace_id": "trace-id22",                     # set langfuse Trace ID
       "trace_user_id": "user-id2",                  # set langfuse Trace User ID
+      "session_id": "session-1",                    # set langfuse Session ID
   },
 )
 
diff --git a/docs/my-website/docs/proxy/virtual_keys.md b/docs/my-website/docs/proxy/virtual_keys.md
index dd5edc6da..c51bfc0ac 100644
--- a/docs/my-website/docs/proxy/virtual_keys.md
+++ b/docs/my-website/docs/proxy/virtual_keys.md
@@ -352,6 +352,22 @@ Request Params:
 }
 ```
 
+## Upperbound /key/generate params
+Use this, if you need to control the upperbound that users can use for `max_budget`, `budget_duration` or any `key/generate` param per key. 
+
+Set `litellm_settings:upperbound_key_generate_params`:
+```yaml
+litellm_settings:
+  upperbound_key_generate_params:
+    max_budget: 100 # upperbound of $100, for all /key/generate requests
+    duration: "30d" # upperbound of 30 days for all /key/generate requests
+```
+
+** Expected Behavior **
+
+- Send a `/key/generate` request with `max_budget=200`
+- Key will be created with `max_budget=100` since 100 is the upper bound
+
 ## Default /key/generate params
 Use this, if you need to control the default `max_budget` or any `key/generate` param per key. 
 
diff --git a/litellm/__init__.py b/litellm/__init__.py
index 3f2a1e4b4..26b761c64 100644
--- a/litellm/__init__.py
+++ b/litellm/__init__.py
@@ -146,6 +146,7 @@ suppress_debug_info = False
 dynamodb_table_name: Optional[str] = None
 s3_callback_params: Optional[Dict] = None
 default_key_generate_params: Optional[Dict] = None
+upperbound_key_generate_params: Optional[Dict] = None
 default_team_settings: Optional[List] = None
 #### RELIABILITY ####
 request_timeout: Optional[float] = 6000
diff --git a/litellm/integrations/langfuse.py b/litellm/integrations/langfuse.py
index cd37a93a3..82de33366 100644
--- a/litellm/integrations/langfuse.py
+++ b/litellm/integrations/langfuse.py
@@ -55,8 +55,21 @@ class LangFuseLogger:
         else:
             self.upstream_langfuse = None
 
+    # def log_error(kwargs, response_obj, start_time, end_time):
+    #     generation = trace.generation(
+    #         level ="ERROR" # can be any of DEBUG, DEFAULT, WARNING or ERROR
+    #         status_message='error' # can be any string (e.g. stringified stack trace or error body)
+    #     )
     def log_event(
-        self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
+        self,
+        kwargs,
+        response_obj,
+        start_time,
+        end_time,
+        user_id,
+        print_verbose,
+        level="DEFAULT",
+        status_message=None,
     ):
         # Method definition
 
@@ -84,37 +97,49 @@ class LangFuseLogger:
                         pass
 
             # end of processing langfuse ########################
-            if kwargs.get("call_type", None) == "embedding" or isinstance(
-                response_obj, litellm.EmbeddingResponse
+            if (
+                level == "ERROR"
+                and status_message is not None
+                and isinstance(status_message, str)
+            ):
+                input = prompt
+                output = status_message
+            elif response_obj is not None and (
+                kwargs.get("call_type", None) == "embedding"
+                or isinstance(response_obj, litellm.EmbeddingResponse)
             ):
                 input = prompt
                 output = response_obj["data"]
-            else:
+            elif response_obj is not None:
                 input = prompt
                 output = response_obj["choices"][0]["message"].json()
-            print_verbose(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
-            self._log_langfuse_v2(
-                user_id,
-                metadata,
-                output,
-                start_time,
-                end_time,
-                kwargs,
-                optional_params,
-                input,
-                response_obj,
-                print_verbose,
-            ) if self._is_langfuse_v2() else self._log_langfuse_v1(
-                user_id,
-                metadata,
-                output,
-                start_time,
-                end_time,
-                kwargs,
-                optional_params,
-                input,
-                response_obj,
-            )
+            print(f"OUTPUT IN LANGFUSE: {output}; original: {response_obj}")
+            if self._is_langfuse_v2():
+                self._log_langfuse_v2(
+                    user_id,
+                    metadata,
+                    output,
+                    start_time,
+                    end_time,
+                    kwargs,
+                    optional_params,
+                    input,
+                    response_obj,
+                    level,
+                    print_verbose,
+                )
+            elif response_obj is not None:
+                self._log_langfuse_v1(
+                    user_id,
+                    metadata,
+                    output,
+                    start_time,
+                    end_time,
+                    kwargs,
+                    optional_params,
+                    input,
+                    response_obj,
+                )
 
             self.Langfuse.flush()
             print_verbose(
@@ -123,15 +148,15 @@ class LangFuseLogger:
             verbose_logger.info(f"Langfuse Layer Logging - logging success")
         except:
             traceback.print_exc()
-            print_verbose(f"Langfuse Layer Error - {traceback.format_exc()}")
+            print(f"Langfuse Layer Error - {traceback.format_exc()}")
             pass
 
     async def _async_log_event(
         self, kwargs, response_obj, start_time, end_time, user_id, print_verbose
     ):
-        self.log_event(
-            kwargs, response_obj, start_time, end_time, user_id, print_verbose
-        )
+        """
+        TODO: support async calls when langfuse is truly async
+        """
 
     def _is_langfuse_v2(self):
         import langfuse
@@ -193,56 +218,78 @@ class LangFuseLogger:
         optional_params,
         input,
         response_obj,
+        level,
         print_verbose,
     ):
         import langfuse
 
-        tags = []
-        supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
-        supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3")
+        try:
+            tags = []
+            supports_tags = Version(langfuse.version.__version__) >= Version("2.6.3")
+            supports_costs = Version(langfuse.version.__version__) >= Version("2.7.3")
 
-        print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ")
+            print_verbose(f"Langfuse Layer Logging - logging to langfuse v2 ")
 
-        generation_name = metadata.get("generation_name", None)
-        if generation_name is None:
-            # just log `litellm-{call_type}` as the generation name
-            generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
+            generation_name = metadata.get("generation_name", None)
+            if generation_name is None:
+                # just log `litellm-{call_type}` as the generation name
+                generation_name = f"litellm-{kwargs.get('call_type', 'completion')}"
 
-        trace_params = {
-            "name": generation_name,
-            "input": input,
-            "output": output,
-            "user_id": metadata.get("trace_user_id", user_id),
-            "id": metadata.get("trace_id", None),
-        }
-        cost = kwargs["response_cost"]
-        print_verbose(f"trace: {cost}")
-        if supports_tags:
-            for key, value in metadata.items():
-                tags.append(f"{key}:{value}")
-            if "cache_hit" in kwargs:
-                tags.append(f"cache_hit:{kwargs['cache_hit']}")
-            trace_params.update({"tags": tags})
+            trace_params = {
+                "name": generation_name,
+                "input": input,
+                "user_id": metadata.get("trace_user_id", user_id),
+                "id": metadata.get("trace_id", None),
+                "session_id": metadata.get("session_id", None),
+            }
 
-        trace = self.Langfuse.trace(**trace_params)
+            if level == "ERROR":
+                trace_params["status_message"] = output
+            else:
+                trace_params["output"] = output
 
-        # get generation_id
-        generation_id = None
-        if response_obj.get("id", None) is not None:
-            generation_id = litellm.utils.get_logging_id(start_time, response_obj)
-        trace.generation(
-            name=generation_name,
-            id=metadata.get("generation_id", generation_id),
-            startTime=start_time,
-            endTime=end_time,
-            model=kwargs["model"],
-            modelParameters=optional_params,
-            input=input,
-            output=output,
-            usage={
-                "prompt_tokens": response_obj["usage"]["prompt_tokens"],
-                "completion_tokens": response_obj["usage"]["completion_tokens"],
-                "total_cost": cost if supports_costs else None,
-            },
-            metadata=metadata,
-        )
+            cost = kwargs.get("response_cost", None)
+            print_verbose(f"trace: {cost}")
+            if supports_tags:
+                for key, value in metadata.items():
+                    tags.append(f"{key}:{value}")
+                if "cache_hit" in kwargs:
+                    tags.append(f"cache_hit:{kwargs['cache_hit']}")
+                trace_params.update({"tags": tags})
+
+            trace = self.Langfuse.trace(**trace_params)
+
+            if level == "ERROR":
+                trace.generation(
+                    level="ERROR",  # can be any of DEBUG, DEFAULT, WARNING or ERROR
+                    status_message=output,  # can be any string (e.g. stringified stack trace or error body)
+                )
+                print(f"SUCCESSFULLY LOGGED ERROR")
+            else:
+                # get generation_id
+                generation_id = None
+                if (
+                    response_obj is not None
+                    and response_obj.get("id", None) is not None
+                ):
+                    generation_id = litellm.utils.get_logging_id(
+                        start_time, response_obj
+                    )
+                trace.generation(
+                    name=generation_name,
+                    id=metadata.get("generation_id", generation_id),
+                    startTime=start_time,
+                    endTime=end_time,
+                    model=kwargs["model"],
+                    modelParameters=optional_params,
+                    input=input,
+                    output=output,
+                    usage={
+                        "prompt_tokens": response_obj["usage"]["prompt_tokens"],
+                        "completion_tokens": response_obj["usage"]["completion_tokens"],
+                        "total_cost": cost if supports_costs else None,
+                    },
+                    metadata=metadata,
+                )
+        except Exception as e:
+            print(f"Langfuse Layer Error - {traceback.format_exc()}")
diff --git a/litellm/llms/ollama.py b/litellm/llms/ollama.py
index d0bc24af4..9339deb78 100644
--- a/litellm/llms/ollama.py
+++ b/litellm/llms/ollama.py
@@ -146,7 +146,15 @@ def get_ollama_response(
             optional_params[k] = v
 
     stream = optional_params.pop("stream", False)
-    data = {"model": model, "prompt": prompt, "options": optional_params}
+    format = optional_params.pop("format", None)
+    data = {
+        "model": model,
+        "prompt": prompt,
+        "options": optional_params,
+        "stream": stream,
+    }
+    if format is not None:
+        data["format"] = format
 
     ## LOGGING
     logging_obj.pre_call(
diff --git a/litellm/llms/ollama_chat.py b/litellm/llms/ollama_chat.py
index 95ff8dfaa..0311931b1 100644
--- a/litellm/llms/ollama_chat.py
+++ b/litellm/llms/ollama_chat.py
@@ -146,7 +146,15 @@ def get_ollama_response(
             optional_params[k] = v
 
     stream = optional_params.pop("stream", False)
-    data = {"model": model, "messages": messages, "options": optional_params}
+    format = optional_params.pop("format", None)
+    data = {
+        "model": model,
+        "messages": messages,
+        "options": optional_params,
+        "stream": stream,
+    }
+    if format is not None:
+        data["format"] = format
     ## LOGGING
     logging_obj.pre_call(
         input=None,
@@ -320,11 +328,15 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
                 model_response["choices"][0]["message"] = message
             else:
                 model_response["choices"][0]["message"] = response_json["message"]
+
             model_response["created"] = int(time.time())
-            model_response["model"] = "ollama/" + data["model"]
+            model_response["model"] = "ollama_chat/" + data["model"]
             prompt_tokens = response_json.get("prompt_eval_count", litellm.token_counter(messages=data["messages"]))  # type: ignore
             completion_tokens = response_json.get(
-                "eval_count", litellm.token_counter(text=response_json["message"])
+                "eval_count",
+                litellm.token_counter(
+                    text=response_json["message"]["content"], count_response_tokens=True
+                ),
             )
             model_response["usage"] = litellm.Usage(
                 prompt_tokens=prompt_tokens,
diff --git a/litellm/main.py b/litellm/main.py
index bc33a69e5..384dadc32 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -263,6 +263,7 @@ async def acompletion(
             or custom_llm_provider == "ollama"
             or custom_llm_provider == "ollama_chat"
             or custom_llm_provider == "vertex_ai"
+            or custom_llm_provider in litellm.openai_compatible_providers
         ):  # currently implemented aiohttp calls for just azure, openai, hf, ollama, vertex ai soon all.
             init_response = await loop.run_in_executor(None, func_with_context)
             if isinstance(init_response, dict) or isinstance(
@@ -3319,6 +3320,10 @@ async def ahealth_check(
                 response = {}  # args like remaining ratelimit etc.
         return response
     except Exception as e:
+        if model not in litellm.model_cost and mode is None:
+            raise Exception(
+                "Missing `mode`. Set the `mode` for the model - https://docs.litellm.ai/docs/proxy/health#embedding-models"
+            )
         return {"error": str(e)}
 
 
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
index 454b2504a..4c28bdbe8 100644
--- a/litellm/model_prices_and_context_window_backup.json
+++ b/litellm/model_prices_and_context_window_backup.json
@@ -1,6 +1,8 @@
 {
     "gpt-4": {
-        "max_tokens": 8192,
+        "max_tokens": 8192, 
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096, 
         "input_cost_per_token": 0.00003,
         "output_cost_per_token": 0.00006,
         "litellm_provider": "openai",
@@ -8,6 +10,8 @@
     },
     "gpt-4-0314": {
         "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00003,
         "output_cost_per_token": 0.00006,
         "litellm_provider": "openai",
@@ -15,6 +19,8 @@
     },
     "gpt-4-0613": {
         "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00003,
         "output_cost_per_token": 0.00006,
         "litellm_provider": "openai",
@@ -22,6 +28,8 @@
     },
     "gpt-4-32k": {
         "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00006,
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
@@ -29,6 +37,8 @@
     },
     "gpt-4-32k-0314": {
         "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00006,
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
@@ -36,6 +46,8 @@
     },
     "gpt-4-32k-0613": {
         "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00006,
         "output_cost_per_token": 0.00012,
         "litellm_provider": "openai",
@@ -43,6 +55,17 @@
     },
     "gpt-4-1106-preview": {
         "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.00003,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-0125-preview": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00001,
         "output_cost_per_token": 0.00003,
         "litellm_provider": "openai",
@@ -50,6 +73,17 @@
     },
     "gpt-4-vision-preview": {
         "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.00003,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
+    "gpt-4-1106-vision-preview": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00001,
         "output_cost_per_token": 0.00003,
         "litellm_provider": "openai",
@@ -57,6 +91,8 @@
     },
     "gpt-3.5-turbo": {
         "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000015,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "openai",
@@ -64,6 +100,8 @@
     },
     "gpt-3.5-turbo-0301": {
         "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000015,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "openai",
@@ -71,6 +109,8 @@
     },
     "gpt-3.5-turbo-0613": {
         "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000015,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "openai",
@@ -78,13 +118,26 @@
     },
     "gpt-3.5-turbo-1106": {
         "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000010,
         "output_cost_per_token": 0.0000020,
         "litellm_provider": "openai",
         "mode": "chat"
     },
+    "gpt-3.5-turbo-0125": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000005,
+        "output_cost_per_token": 0.0000015,
+        "litellm_provider": "openai",
+        "mode": "chat"
+    },
     "gpt-3.5-turbo-16k": {
         "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
@@ -92,6 +145,8 @@
     },
     "gpt-3.5-turbo-16k-0613": {
         "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000004,
         "litellm_provider": "openai",
@@ -99,11 +154,27 @@
     },
     "ft:gpt-3.5-turbo": {
         "max_tokens": 4097,
-        "input_cost_per_token": 0.000012,
-        "output_cost_per_token": 0.000016,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
         "mode": "chat"
     },
+    "text-embedding-3-large": {
+        "max_tokens": 8191,
+        "input_cost_per_token": 0.00000013,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "text-embedding-3-small": {
+        "max_tokens": 8191,
+        "input_cost_per_token": 0.00000002,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
     "text-embedding-ada-002": {
         "max_tokens": 8191,
         "input_cost_per_token": 0.0000001,
@@ -111,41 +182,173 @@
         "litellm_provider": "openai",
         "mode": "embedding"
     },
+    "text-embedding-ada-002-v2": {
+        "max_tokens": 8191,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "openai",
+        "mode": "embedding"
+    },
+    "256-x-256/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000024414,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "512-x-512/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.0000000686,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "1024-x-1024/dall-e-2": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.000000019,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1024-x-1792/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000006539,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1792-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000006539,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "hd/1024-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000007629,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1024-x-1792/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000004359,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1792-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.00000004359,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
+    "standard/1024-x-1024/dall-e-3": {
+        "mode": "image_generation",
+        "input_cost_per_pixel": 0.0000000381469,
+        "output_cost_per_pixel": 0.0,
+        "litellm_provider": "openai"
+    },
     "azure/gpt-4-1106-preview": {
         "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00001,
         "output_cost_per_token": 0.00003,
         "litellm_provider": "azure",
         "mode": "chat"
     },
-    "azure/gpt-4-32k": {
+    "azure/gpt-4-0613": {
         "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00003,
+        "output_cost_per_token": 0.00006,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-4-32k-0613": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00006,
+        "output_cost_per_token": 0.00012,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-4-32k": {
+        "max_tokens": 32768,
+        "max_input_tokens": 32768,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00006,
         "output_cost_per_token": 0.00012,
         "litellm_provider": "azure",
         "mode": "chat"
     },
     "azure/gpt-4": {
-        "max_tokens": 16385,
+        "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.00003,
         "output_cost_per_token": 0.00006,
         "litellm_provider": "azure",
         "mode": "chat"
     },
-    "azure/gpt-3.5-turbo-16k": {
+    "azure/gpt-4-turbo": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.00003,
+        "litellm_provider": "azure", 
+        "mode": "chat"
+    },
+    "azure/gpt-4-turbo-vision-preview": {
+        "max_tokens": 128000,
+        "max_input_tokens": 128000,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.00003,
+        "litellm_provider": "azure", 
+        "mode": "chat"
+    },
+    "azure/gpt-35-turbo-16k-0613": {
         "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.000003,
         "output_cost_per_token": 0.000004,
         "litellm_provider": "azure",
         "mode": "chat"
     },
-    "azure/gpt-3.5-turbo": {
-        "max_tokens": 4097,
+    "azure/gpt-35-turbo-1106": {
+        "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000015,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "azure",
         "mode": "chat"
     },
+    "azure/gpt-35-turbo-16k": {
+        "max_tokens": 16385,
+        "max_input_tokens": 16385,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000004,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/gpt-35-turbo": {
+        "max_tokens": 4097,
+        "max_input_tokens": 4097,
+        "max_output_tokens": 4096,
+        "input_cost_per_token": 0.0000015,
+        "output_cost_per_token": 0.000002,
+        "litellm_provider": "azure",
+        "mode": "chat"
+    },
+    "azure/ada": {
+        "max_tokens": 8191,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "azure",
+        "mode": "embedding"
+    },
     "azure/text-embedding-ada-002": {
         "max_tokens": 8191,
         "input_cost_per_token": 0.0000001,
@@ -153,36 +356,52 @@
         "litellm_provider": "azure",
         "mode": "embedding"
     },
-    "text-davinci-003": {
-        "max_tokens": 4097,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-        "litellm_provider": "text-completion-openai",
-        "mode": "completion"
+    "azure/standard/1024-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 0.0000000381469,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
     },
-    "text-curie-001": {
-        "max_tokens": 2049,
-        "input_cost_per_token": 0.000002,
-        "output_cost_per_token": 0.000002,
-        "litellm_provider": "text-completion-openai",
-        "mode": "completion"
+    "azure/hd/1024-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 0.00000007629,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
     },
-    "text-babbage-001": {
-        "max_tokens": 2049,
-        "input_cost_per_token": 0.0000004,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "text-completion-openai",
-        "mode": "completion"
+    "azure/standard/1024-x-1792/dall-e-3": {
+        "input_cost_per_pixel": 0.00000004359,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
     },
-    "text-ada-001": {
-        "max_tokens": 2049,
-        "input_cost_per_token": 0.0000004,
-        "output_cost_per_token": 0.0000004,
-        "litellm_provider": "text-completion-openai",
-        "mode": "completion"
+    "azure/standard/1792-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 0.00000004359,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
+    },
+    "azure/hd/1024-x-1792/dall-e-3": {
+        "input_cost_per_pixel": 0.00000006539,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
+    },
+    "azure/hd/1792-x-1024/dall-e-3": {
+        "input_cost_per_pixel": 0.00000006539,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
+    },
+    "azure/standard/1024-x-1024/dall-e-2": {
+        "input_cost_per_pixel": 0.0,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "azure", 
+        "mode": "image_generation"
     },
     "babbage-002": {
         "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000004,
         "output_cost_per_token": 0.0000004,
         "litellm_provider": "text-completion-openai",
@@ -190,6 +409,8 @@
     },
     "davinci-002": {
         "max_tokens": 16384,
+        "max_input_tokens": 16384,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.000002,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "text-completion-openai",
@@ -197,6 +418,8 @@
     },    
     "gpt-3.5-turbo-instruct": {
         "max_tokens": 8192,
+        "max_input_tokens": 8192,
+        "max_output_tokens": 4096,
         "input_cost_per_token": 0.0000015,
         "output_cost_per_token": 0.000002,
         "litellm_provider": "text-completion-openai",
@@ -210,6 +433,33 @@
         "litellm_provider": "anthropic",
         "mode": "chat"
     },
+    "mistral/mistral-tiny": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.00000015,
+        "output_cost_per_token": 0.00000046,
+        "litellm_provider": "mistral",
+        "mode": "chat"
+    },
+    "mistral/mistral-small": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.00000066,
+        "output_cost_per_token": 0.00000197,
+        "litellm_provider": "mistral",
+        "mode": "chat"
+    },
+    "mistral/mistral-medium": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.00000273,
+        "output_cost_per_token": 0.00000820,
+        "litellm_provider": "mistral",
+        "mode": "chat"
+    },
+    "mistral/mistral-embed": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.000000111,
+        "litellm_provider": "mistral",
+        "mode": "embedding"
+    },
     "claude-instant-1.2": {
         "max_tokens": 100000,
         "max_output_tokens": 8191,
@@ -248,6 +498,20 @@
         "litellm_provider": "vertex_ai-text-models",
         "mode": "completion"
     },
+    "text-unicorn": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.000028,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion"
+    },
+    "text-unicorn@001": {
+        "max_tokens": 8192,
+        "input_cost_per_token": 0.00001,
+        "output_cost_per_token": 0.000028,
+        "litellm_provider": "vertex_ai-text-models",
+        "mode": "completion"
+    },
     "chat-bison": {
         "max_tokens": 4096,
         "input_cost_per_token": 0.000000125,
@@ -262,6 +526,13 @@
         "litellm_provider": "vertex_ai-chat-models",
         "mode": "chat"
     },
+    "chat-bison@002": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "litellm_provider": "vertex_ai-chat-models",
+        "mode": "chat"
+    },
     "chat-bison-32k": {
         "max_tokens": 32000,
         "input_cost_per_token": 0.000000125,
@@ -287,14 +558,21 @@
         "max_tokens": 2048,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
-        "litellm_provider": "vertex_ai-chat-models",
+        "litellm_provider": "vertex_ai-code-text-models",
         "mode": "completion"
     },
-    "code-gecko@latest": {
+    "code-gecko@002": {
         "max_tokens": 2048,
         "input_cost_per_token": 0.000000125,
         "output_cost_per_token": 0.000000125,
-        "litellm_provider": "vertex_ai-chat-models",
+        "litellm_provider": "vertex_ai-code-text-models",
+        "mode": "completion"
+    },
+    "code-gecko": {
+        "max_tokens": 2048,
+        "input_cost_per_token": 0.000000125,
+        "output_cost_per_token": 0.000000125,
+        "litellm_provider": "vertex_ai-code-text-models",
         "mode": "completion"
     },
     "codechat-bison": {
@@ -318,6 +596,67 @@
         "litellm_provider": "vertex_ai-code-chat-models",
         "mode": "chat"
     },
+    "gemini-pro": {
+        "max_tokens": 30720,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 0.00000025, 
+        "output_cost_per_token": 0.0000005,
+        "litellm_provider": "vertex_ai-language-models",
+        "mode": "chat"
+    },
+    "gemini-pro-vision": {
+        "max_tokens": 30720,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 0.00000025, 
+        "output_cost_per_token": 0.0000005,
+        "litellm_provider": "vertex_ai-vision-models",
+        "mode": "chat"
+    },
+    "textembedding-gecko": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 0.00000000625,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding"
+    },
+    "textembedding-gecko-multilingual": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 0.00000000625,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding"
+    },
+    "textembedding-gecko-multilingual@001": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 0.00000000625,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding"
+    },
+    "textembedding-gecko@001": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 0.00000000625,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding"
+    },
+    "textembedding-gecko@003": {
+        "max_tokens": 3072,
+        "max_input_tokens": 3072,
+        "output_vector_size": 768,
+        "input_cost_per_token": 0.00000000625,
+        "output_cost_per_token": 0,
+        "litellm_provider": "vertex_ai-embedding-models",
+        "mode": "embedding"
+    },
     "palm/chat-bison": {
         "max_tokens": 4096,
         "input_cost_per_token": 0.000000125,
@@ -360,6 +699,22 @@
         "litellm_provider": "palm",
         "mode": "completion"
     },
+    "gemini/gemini-pro": {
+        "max_tokens": 30720,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 0.0, 
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "gemini",
+        "mode": "chat"
+    },
+    "gemini/gemini-pro-vision": {
+        "max_tokens": 30720,
+        "max_output_tokens": 2048,
+        "input_cost_per_token": 0.0, 
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "gemini",
+        "mode": "chat"
+    },
     "command-nightly": {
         "max_tokens": 4096,
         "input_cost_per_token": 0.000015,
@@ -628,6 +983,14 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "amazon.titan-embed-text-v1": {
+        "max_tokens": 8192, 
+        "output_vector_size": 1536,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0,
+        "litellm_provider": "bedrock", 
+        "mode": "embedding"
+    },
     "anthropic.claude-v1": {
         "max_tokens": 100000, 
         "max_output_tokens": 8191,
@@ -636,6 +999,102 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "bedrock/us-east-1/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
     "anthropic.claude-v2": {
         "max_tokens": 100000, 
         "max_output_tokens": 8191,
@@ -644,6 +1103,102 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "bedrock/us-east-1/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
     "anthropic.claude-v2:1": {
         "max_tokens": 200000, 
         "max_output_tokens": 8191,
@@ -652,6 +1207,102 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "bedrock/us-east-1/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0455,
+        "output_cost_per_second": 0.0455,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02527,
+        "output_cost_per_second": 0.02527,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.000008,
+        "output_cost_per_token": 0.000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0415,
+        "output_cost_per_second": 0.0415,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.02305,
+        "output_cost_per_second": 0.02305,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.0175,
+        "output_cost_per_second": 0.0175,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-v2:1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00972,
+        "output_cost_per_second": 0.00972,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
     "anthropic.claude-instant-v1": {
         "max_tokens": 100000, 
         "max_output_tokens": 8191,
@@ -660,6 +1311,102 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "bedrock/us-east-1/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.0000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-east-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00611,
+        "output_cost_per_second": 0.00611,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.00611,
+        "output_cost_per_second": 0.00611,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/us-west-2/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.0000008,
+        "output_cost_per_token": 0.0000024,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.00000223,
+        "output_cost_per_token": 0.00000755,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.01475,
+        "output_cost_per_second": 0.01475,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/ap-northeast-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.008194,
+        "output_cost_per_second": 0.008194,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_token": 0.00000248,
+        "output_cost_per_token": 0.00000838,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/1-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.01635,
+        "output_cost_per_second": 0.01635,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/eu-central-1/6-month-commitment/anthropic.claude-instant-v1": {
+        "max_tokens": 100000, 
+        "max_output_tokens": 8191,
+        "input_cost_per_second": 0.009083,
+        "output_cost_per_second": 0.009083,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
     "cohere.command-text-v14": {
         "max_tokens": 4096, 
         "input_cost_per_token": 0.0000015,
@@ -667,6 +1414,55 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "bedrock/*/1-month-commitment/cohere.command-text-v14": {
+        "max_tokens": 4096, 
+        "input_cost_per_second": 0.011,
+        "output_cost_per_second": 0.011,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/6-month-commitment/cohere.command-text-v14": {
+        "max_tokens": 4096, 
+        "input_cost_per_second": 0.0066027,
+        "output_cost_per_second": 0.0066027,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.command-light-text-v14": {
+        "max_tokens": 4000, 
+        "input_cost_per_token": 0.0000003,
+        "output_cost_per_token": 0.0000006,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/1-month-commitment/cohere.command-light-text-v14": {
+        "max_tokens": 4096, 
+        "input_cost_per_second": 0.001902,
+        "output_cost_per_second": 0.001902,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "bedrock/*/6-month-commitment/cohere.command-light-text-v14": {
+        "max_tokens": 4096, 
+        "input_cost_per_second": 0.0011416,
+        "output_cost_per_second": 0.0011416,
+        "litellm_provider": "bedrock",
+        "mode": "chat"
+    },
+    "cohere.embed-english-v3": {
+        "max_tokens": 512, 
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
+    "cohere.embed-multilingual-v3": {
+        "max_tokens": 512, 
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "bedrock",
+        "mode": "embedding"
+    },
     "meta.llama2-13b-chat-v1": {
         "max_tokens": 4096, 
         "input_cost_per_token": 0.00000075,
@@ -681,6 +1477,48 @@
         "litellm_provider": "bedrock",
         "mode": "chat"
     },
+    "512-x-512/50-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.018,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "512-x-512/max-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.036,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "max-x-max/50-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.036,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "max-x-max/max-steps/stability.stable-diffusion-xl-v0": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.072,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "1024-x-1024/50-steps/stability.stable-diffusion-xl-v1": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.04,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
+    "1024-x-1024/max-steps/stability.stable-diffusion-xl-v1": {
+        "max_tokens": 77, 
+        "max_input_tokens": 77, 
+        "output_cost_per_image": 0.08,
+        "litellm_provider": "bedrock",
+        "mode": "image_generation"
+    },
     "sagemaker/meta-textgeneration-llama-2-7b": {
         "max_tokens": 4096, 
         "input_cost_per_token": 0.000,
@@ -805,104 +1643,197 @@
         "litellm_provider": "ollama",
         "mode": "completion"
     },
+    "deepinfra/lizpreciatior/lzlv_70b_fp16_hf": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000070,
+        "output_cost_per_token": 0.00000090,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/Gryphe/MythoMax-L2-13b": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000022,
+        "output_cost_per_token": 0.00000022,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": {
+        "max_tokens": 32768,
+        "input_cost_per_token": 0.00000013,
+        "output_cost_per_token": 0.00000013,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
     "deepinfra/meta-llama/Llama-2-70b-chat-hf": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.000000700,
-        "output_cost_per_token": 0.000000950,
+        "input_cost_per_token": 0.00000070,
+        "output_cost_per_token": 0.00000090,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/cognitivecomputations/dolphin-2.6-mixtral-8x7b": {
+        "max_tokens": 32768,
+        "input_cost_per_token": 0.00000027,
+        "output_cost_per_token": 0.00000027,
         "litellm_provider": "deepinfra",
         "mode": "chat"
     },
     "deepinfra/codellama/CodeLlama-34b-Instruct-hf": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.0000006,
-        "output_cost_per_token": 0.0000006,
+        "input_cost_per_token": 0.00000060,
+        "output_cost_per_token": 0.00000060,
         "litellm_provider": "deepinfra",
         "mode": "chat"
-      },
-      "deepinfra/meta-llama/Llama-2-13b-chat-hf": {
+    },
+    "deepinfra/deepinfra/mixtral": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.00000035,
-        "output_cost_per_token": 0.00000035,
+        "input_cost_per_token": 0.00000027,
+        "output_cost_per_token": 0.00000027,
         "litellm_provider": "deepinfra",
-        "mode": "chat"
-      },
-      "deepinfra/meta-llama/Llama-2-7b-chat-hf": {
+        "mode": "completion"
+    },
+    "deepinfra/Phind/Phind-CodeLlama-34B-v2": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.0000002,
-        "output_cost_per_token": 0.0000002,
+        "input_cost_per_token": 0.00000060,
+        "output_cost_per_token": 0.00000060,
         "litellm_provider": "deepinfra",
         "mode": "chat"
-      },
-      "deepinfra/mistralai/Mistral-7B-Instruct-v0.1": {
+    },
+    "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": {
+        "max_tokens": 32768,
+        "input_cost_per_token": 0.00000027,
+        "output_cost_per_token": 0.00000027,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/deepinfra/airoboros-70b": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.0000002,
-        "output_cost_per_token": 0.0000002,
+        "input_cost_per_token": 0.00000070,
+        "output_cost_per_token": 0.00000090,
         "litellm_provider": "deepinfra",
         "mode": "chat"
-      },
-      "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": {
+    },
+    "deepinfra/01-ai/Yi-34B-Chat": {
         "max_tokens": 4096,
-        "input_cost_per_token": 0.0000007,
-        "output_cost_per_token": 0.00000095,
+        "input_cost_per_token": 0.00000060,
+        "output_cost_per_token": 0.00000060,
         "litellm_provider": "deepinfra",
         "mode": "chat"
-      },
-      "perplexity/pplx-7b-chat": { 
+    },
+    "deepinfra/01-ai/Yi-6B-200K": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000013,
+        "output_cost_per_token": 0.00000013,
+        "litellm_provider": "deepinfra",
+        "mode": "completion"
+    },
+    "deepinfra/jondurbin/airoboros-l2-70b-gpt4-1.4.1": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000070,
+        "output_cost_per_token": 0.00000090,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Llama-2-13b-chat-hf": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000022,
+        "output_cost_per_token": 0.00000022,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/amazon/MistralLite": {
+        "max_tokens": 32768,
+        "input_cost_per_token": 0.00000020,
+        "output_cost_per_token": 0.00000020,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/meta-llama/Llama-2-7b-chat-hf": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000013,
+        "output_cost_per_token": 0.00000013,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "deepinfra/01-ai/Yi-34B-200K": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000060,
+        "output_cost_per_token": 0.00000060,
+        "litellm_provider": "deepinfra",
+        "mode": "completion"
+    },
+    "deepinfra/openchat/openchat_3.5": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000013,
+        "output_cost_per_token": 0.00000013,
+        "litellm_provider": "deepinfra",
+        "mode": "chat"
+    },
+    "perplexity/codellama-34b-instruct": { 
+        "max_tokens": 16384, 
+        "input_cost_per_token": 0.00000035, 
+        "output_cost_per_token": 0.00000140,  
+        "litellm_provider": "perplexity", 
+        "mode": "chat" 
+    },
+    "perplexity/codellama-70b-instruct": { 
+        "max_tokens": 16384, 
+        "input_cost_per_token": 0.00000070, 
+        "output_cost_per_token": 0.00000280,  
+        "litellm_provider": "perplexity", 
+        "mode": "chat" 
+    },
+    "perplexity/pplx-7b-chat": { 
         "max_tokens": 8192, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
+        "input_cost_per_token": 0.00000007, 
+        "output_cost_per_token": 0.00000028, 
         "litellm_provider": "perplexity", 
         "mode": "chat" 
-      },
-      "perplexity/pplx-70b-chat": { 
+    },
+    "perplexity/pplx-70b-chat": { 
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.00000070, 
+        "output_cost_per_token": 0.00000280, 
+        "litellm_provider": "perplexity", 
+        "mode": "chat" 
+    },
+    "perplexity/pplx-7b-online": { 
         "max_tokens": 4096, 
         "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
+        "output_cost_per_token": 0.00000028, 
+        "input_cost_per_request": 0.005,
         "litellm_provider": "perplexity", 
         "mode": "chat" 
-      },
-      "perplexity/pplx-7b-online": { 
+    },
+    "perplexity/pplx-70b-online": { 
         "max_tokens": 4096, 
         "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.0005, 
+        "output_cost_per_token": 0.00000280, 
+        "input_cost_per_request": 0.005,
         "litellm_provider": "perplexity", 
         "mode": "chat" 
-      },
-      "perplexity/pplx-70b-online": { 
+    },
+    "perplexity/llama-2-70b-chat": { 
         "max_tokens": 4096, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.0005, 
+        "input_cost_per_token": 0.00000070, 
+        "output_cost_per_token": 0.00000280,
         "litellm_provider": "perplexity", 
         "mode": "chat" 
-      },
-      "perplexity/llama-2-13b-chat": { 
+    },
+    "perplexity/mistral-7b-instruct": { 
         "max_tokens": 4096, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
+        "input_cost_per_token": 0.00000007,
+        "output_cost_per_token": 0.00000028,
         "litellm_provider": "perplexity", 
         "mode": "chat" 
-      },
-      "perplexity/llama-2-70b-chat": { 
-        "max_tokens": 4096, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
-        "litellm_provider": "perplexity", 
-        "mode": "chat" 
-      },
-      "perplexity/mistral-7b-instruct": { 
-        "max_tokens": 4096, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
-        "litellm_provider": "perplexity", 
-        "mode": "chat" 
-      },
-      "perplexity/replit-code-v1.5-3b": { 
-        "max_tokens": 4096, 
-        "input_cost_per_token": 0.0000000, 
-        "output_cost_per_token": 0.000000, 
-        "litellm_provider": "perplexity", 
-        "mode": "chat" 
-      },
+    },
+    "perplexity/mixtral-8x7b-instruct": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.00000007,
+        "output_cost_per_token": 0.00000028,
+        "litellm_provider": "perplexity",
+        "mode": "chat"
+    },
       "anyscale/mistralai/Mistral-7B-Instruct-v0.1": {
         "max_tokens": 16384, 
         "input_cost_per_token": 0.00000015, 
@@ -944,5 +1875,48 @@
         "output_cost_per_token": 0.000001, 
         "litellm_provider": "anyscale", 
         "mode": "chat"
-      }
+      },
+      "cloudflare/@cf/meta/llama-2-7b-chat-fp16": {
+        "max_tokens": 3072, 
+        "input_cost_per_token": 0.000001923, 
+        "output_cost_per_token": 0.000001923, 
+        "litellm_provider": "cloudflare", 
+        "mode": "chat"
+      },
+      "cloudflare/@cf/meta/llama-2-7b-chat-int8": {
+        "max_tokens": 2048, 
+        "input_cost_per_token": 0.000001923, 
+        "output_cost_per_token": 0.000001923, 
+        "litellm_provider": "cloudflare", 
+        "mode": "chat"
+      },
+      "cloudflare/@cf/mistral/mistral-7b-instruct-v0.1": {
+        "max_tokens": 8192, 
+        "input_cost_per_token": 0.000001923, 
+        "output_cost_per_token": 0.000001923, 
+        "litellm_provider": "cloudflare", 
+        "mode": "chat"
+      },
+      "cloudflare/@hf/thebloke/codellama-7b-instruct-awq": {
+        "max_tokens": 4096, 
+        "input_cost_per_token": 0.000001923, 
+        "output_cost_per_token": 0.000001923, 
+        "litellm_provider": "cloudflare", 
+        "mode": "chat"
+      },
+      "voyage/voyage-01": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    },
+    "voyage/voyage-lite-01": {
+        "max_tokens": 4096,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.000000,
+        "litellm_provider": "voyage",
+        "mode": "embedding"
+    }
+
 }
diff --git a/litellm/proxy/proxy_config.yaml b/litellm/proxy/proxy_config.yaml
index 326544f41..a8144e9d4 100644
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@@ -78,7 +78,9 @@ litellm_settings:
     type: "redis-semantic"
     similarity_threshold: 0.8
     redis_semantic_cache_embedding_model: azure-embedding-model
-  # cache: True
+  upperbound_key_generate_params:
+    max_budget: 100
+    duration: "30d"   
   # setting callback class
   # callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
 
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 70e602e99..661e932f3 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -636,6 +636,36 @@ async def user_api_key_auth(
                     raise Exception(
                         f"Only master key can be used to generate, delete, update or get info for new keys/users. Value of allow_user_auth={allow_user_auth}"
                     )
+
+        # check if token is from litellm-ui, litellm ui makes keys to allow users to login with sso. These keys can only be used for LiteLLM UI functions
+        # sso/login, ui/login, /key functions and /user functions
+        # this will never be allowed to call /chat/completions
+        token_team = getattr(valid_token, "team_id", None)
+        if token_team is not None:
+            if token_team == "litellm-dashboard":
+                # this token is only used for managing the ui
+                allowed_routes = [
+                    "/sso",
+                    "/login",
+                    "/key",
+                    "/spend",
+                    "/user",
+                ]
+                # check if the current route startswith any of the allowed routes
+                if (
+                    route is not None
+                    and isinstance(route, str)
+                    and any(
+                        route.startswith(allowed_route)
+                        for allowed_route in allowed_routes
+                    )
+                ):
+                    # Do something if the current route starts with any of the allowed routes
+                    pass
+                else:
+                    raise Exception(
+                        f"This key is made for LiteLLM UI, Tried to access route: {route}. Not allowed"
+                    )
             return UserAPIKeyAuth(api_key=api_key, **valid_token_dict)
         else:
             raise Exception(f"Invalid Key Passed to LiteLLM Proxy")
@@ -758,9 +788,10 @@ async def _PROXY_track_cost_callback(
             verbose_proxy_logger.info(
                 f"response_cost {response_cost}, for user_id {user_id}"
             )
-            if user_api_key and (
-                prisma_client is not None or custom_db_client is not None
-            ):
+            verbose_proxy_logger.debug(
+                f"user_api_key {user_api_key}, prisma_client: {prisma_client}, custom_db_client: {custom_db_client}"
+            )
+            if user_api_key is not None:
                 await update_database(
                     token=user_api_key,
                     response_cost=response_cost,
@@ -770,6 +801,8 @@ async def _PROXY_track_cost_callback(
                     start_time=start_time,
                     end_time=end_time,
                 )
+            else:
+                raise Exception("User API key missing from custom callback.")
         else:
             if kwargs["stream"] != True or (
                 kwargs["stream"] == True
@@ -1361,6 +1394,26 @@ class ProxyConfig:
 proxy_config = ProxyConfig()
 
 
+def _duration_in_seconds(duration: str):
+    match = re.match(r"(\d+)([smhd]?)", duration)
+    if not match:
+        raise ValueError("Invalid duration format")
+
+    value, unit = match.groups()
+    value = int(value)
+
+    if unit == "s":
+        return value
+    elif unit == "m":
+        return value * 60
+    elif unit == "h":
+        return value * 3600
+    elif unit == "d":
+        return value * 86400
+    else:
+        raise ValueError("Unsupported duration unit")
+
+
 async def generate_key_helper_fn(
     duration: Optional[str],
     models: list,
@@ -1395,25 +1448,6 @@ async def generate_key_helper_fn(
     if token is None:
         token = f"sk-{secrets.token_urlsafe(16)}"
 
-    def _duration_in_seconds(duration: str):
-        match = re.match(r"(\d+)([smhd]?)", duration)
-        if not match:
-            raise ValueError("Invalid duration format")
-
-        value, unit = match.groups()
-        value = int(value)
-
-        if unit == "s":
-            return value
-        elif unit == "m":
-            return value * 60
-        elif unit == "h":
-            return value * 3600
-        elif unit == "d":
-            return value * 86400
-        else:
-            raise ValueError("Unsupported duration unit")
-
     if duration is None:  # allow tokens that never expire
         expires = None
     else:
@@ -2630,6 +2664,36 @@ async def generate_key_fn(
                 elif key == "metadata" and value == {}:
                     setattr(data, key, litellm.default_key_generate_params.get(key, {}))
 
+        # check if user set default key/generate params on config.yaml
+        if litellm.upperbound_key_generate_params is not None:
+            for elem in data:
+                # if key in litellm.upperbound_key_generate_params, use the min of value and litellm.upperbound_key_generate_params[key]
+                key, value = elem
+                if value is not None and key in litellm.upperbound_key_generate_params:
+                    # if value is float/int
+                    if key in [
+                        "max_budget",
+                        "max_parallel_requests",
+                        "tpm_limit",
+                        "rpm_limit",
+                    ]:
+                        if value > litellm.upperbound_key_generate_params[key]:
+                            # directly compare floats/ints
+                            setattr(
+                                data, key, litellm.upperbound_key_generate_params[key]
+                            )
+                    elif key == "budget_duration":
+                        # budgets are in 1s, 1m, 1h, 1d, 1m (30s, 30m, 30h, 30d, 30m)
+                        # compare the duration in seconds and max duration in seconds
+                        upperbound_budget_duration = _duration_in_seconds(
+                            duration=litellm.upperbound_key_generate_params[key]
+                        )
+                        user_set_budget_duration = _duration_in_seconds(duration=value)
+                        if user_set_budget_duration > upperbound_budget_duration:
+                            setattr(
+                                data, key, litellm.upperbound_key_generate_params[key]
+                            )
+
         data_json = data.json()  # type: ignore
 
         # if we get max_budget passed to /key/generate, then use it as key_max_budget. Since generate_key_helper_fn is used to make new users
diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index bd0301f20..e0ee05d4f 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -41,7 +41,7 @@ def test_completion_custom_provider_model_name():
             messages=messages,
             logger_fn=logger_fn,
         )
-        # Add any assertions here to check the, response
+        # Add any assertions here to check the,response
         print(response)
         print(response["choices"][0]["finish_reason"])
     except litellm.Timeout as e:
diff --git a/litellm/tests/test_configs/test_config_no_auth.yaml b/litellm/tests/test_configs/test_config_no_auth.yaml
index 8441018e3..ccebe016d 100644
--- a/litellm/tests/test_configs/test_config_no_auth.yaml
+++ b/litellm/tests/test_configs/test_config_no_auth.yaml
@@ -9,21 +9,11 @@ model_list:
     api_key: os.environ/AZURE_CANADA_API_KEY
     model: azure/gpt-35-turbo
   model_name: azure-model
-- litellm_params:
-    api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
-    api_key: os.environ/AZURE_API_KEY
-    model: azure/chatgpt-v-2
-  model_name: azure-cloudflare-model
 - litellm_params:
     api_base: https://openai-france-1234.openai.azure.com
     api_key: os.environ/AZURE_FRANCE_API_KEY
     model: azure/gpt-turbo
   model_name: azure-model
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-  model_name: test_openai_models
 - litellm_params:
     model: gpt-3.5-turbo
   model_info:
@@ -36,93 +26,8 @@ model_list:
     description: this is a test openai model
     id: 4d1ee26c-abca-450c-8744-8e87fd6755e9
   model_name: test_openai_models
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 00e19c0f-b63d-42bb-88e9-016fb0c60764
-  model_name: test_openai_models
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 79fc75bf-8e1b-47d5-8d24-9365a854af03
-  model_name: test_openai_models
-- litellm_params:
-    api_base: os.environ/AZURE_API_BASE
-    api_key: os.environ/AZURE_API_KEY
-    api_version: 2023-07-01-preview
-    model: azure/azure-embedding-model
-  model_info:
-    mode: embedding
-  model_name: azure-embedding-model
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 55848c55-4162-40f9-a6e2-9a722b9ef404
-  model_name: test_openai_models
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 34339b1e-e030-4bcc-a531-c48559f10ce4
-  model_name: test_openai_models
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: f6f74e14-ac64-4403-9365-319e584dcdc5
-  model_name: test_openai_models
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 9b1ef341-322c-410a-8992-903987fef439
-  model_name: test_openai_models
 - litellm_params:
     model: bedrock/amazon.titan-embed-text-v1
   model_info:
     mode: embedding
   model_name: amazon-embeddings
-- litellm_params:
-    model: sagemaker/berri-benchmarking-gpt-j-6b-fp16
-  model_info:
-    mode: embedding
-  model_name: GPT-J 6B - Sagemaker Text Embedding (Internal)
-- litellm_params:
-    model: dall-e-3
-  model_info:
-    mode: image_generation
-  model_name: dall-e-3
-- litellm_params:
-    api_base: os.environ/AZURE_SWEDEN_API_BASE
-    api_key: os.environ/AZURE_SWEDEN_API_KEY
-    api_version: 2023-12-01-preview
-    model: azure/dall-e-3-test
-  model_info:
-    mode: image_generation
-  model_name: dall-e-3
-- litellm_params:
-    api_base: os.environ/AZURE_API_BASE
-    api_key: os.environ/AZURE_API_KEY
-    api_version: 2023-06-01-preview
-    model: azure/
-  model_info:
-    mode: image_generation
-  model_name: dall-e-2
-- litellm_params:
-    api_base: os.environ/AZURE_API_BASE
-    api_key: os.environ/AZURE_API_KEY
-    api_version: 2023-07-01-preview
-    model: azure/azure-embedding-model
-  model_info:
-    base_model: text-embedding-ada-002
-    mode: embedding
-  model_name: text-embedding-ada-002
-- litellm_params:
-    model: gpt-3.5-turbo
-  model_info:
-    description: this is a test openai model
-    id: 34cb2419-7c63-44ae-a189-53f1d1ce5953
-  model_name: test_openai_models
diff --git a/litellm/tests/test_key_generate_dynamodb.py b/litellm/tests/test_key_generate_dynamodb.py
index 61d0ff6a6..e77dc7472 100644
--- a/litellm/tests/test_key_generate_dynamodb.py
+++ b/litellm/tests/test_key_generate_dynamodb.py
@@ -490,8 +490,13 @@ def test_dynamo_db_migration(custom_db_client):
     try:
 
         async def test():
+            request = GenerateKeyRequest(max_budget=1)
+            key = await generate_key_fn(request)
+            print(key)
+
+            generated_key = key.key
             bearer_token = (
-                "Bearer " + "sk-elJDL2pOEjcAuC7zD4psAg"
+                "Bearer " + generated_key
             )  # this works with ishaan's db, it's a never expiring key
 
             request = Request(scope={"type": "http"})
diff --git a/litellm/tests/test_key_generate_prisma.py b/litellm/tests/test_key_generate_prisma.py
index 734a0b114..b4c86afb2 100644
--- a/litellm/tests/test_key_generate_prisma.py
+++ b/litellm/tests/test_key_generate_prisma.py
@@ -44,6 +44,7 @@ from litellm.proxy.proxy_server import (
     info_key_fn,
     update_key_fn,
     generate_key_fn,
+    generate_key_helper_fn,
     spend_user_fn,
     spend_key_fn,
     view_spend_logs,
@@ -1278,6 +1279,40 @@ async def test_default_key_params(prisma_client):
         pytest.fail(f"Got exception {e}")
 
 
+@pytest.mark.asyncio()
+async def test_upperbound_key_params(prisma_client):
+    """
+    - create key
+    - get key info
+    - assert key_name is not null
+    """
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    litellm.upperbound_key_generate_params = {
+        "max_budget": 0.001,
+        "budget_duration": "1m",
+    }
+    await litellm.proxy.proxy_server.prisma_client.connect()
+    try:
+        request = GenerateKeyRequest(
+            max_budget=200000,
+            budget_duration="30d",
+        )
+        key = await generate_key_fn(request)
+        generated_key = key.key
+
+        result = await info_key_fn(key=generated_key)
+        key_info = result["info"]
+        # assert it used the upper bound for max_budget, and budget_duration
+        assert key_info["max_budget"] == 0.001
+        assert key_info["budget_duration"] == "1m"
+
+        print(result)
+    except Exception as e:
+        print("Got Exception", e)
+        pytest.fail(f"Got exception {e}")
+
+
 def test_get_bearer_token():
     from litellm.proxy.proxy_server import _get_bearer_token
 
@@ -1378,3 +1413,35 @@ async def test_user_api_key_auth_without_master_key(prisma_client):
     except Exception as e:
         print("Got Exception", e)
         pytest.fail(f"Got exception {e}")
+
+
+@pytest.mark.asyncio
+async def test_key_with_no_permissions(prisma_client):
+    """
+    - create key
+    - get key info
+    - assert key_name is null
+    """
+    setattr(litellm.proxy.proxy_server, "prisma_client", prisma_client)
+    setattr(litellm.proxy.proxy_server, "master_key", "sk-1234")
+    setattr(litellm.proxy.proxy_server, "general_settings", {"allow_user_auth": False})
+    await litellm.proxy.proxy_server.prisma_client.connect()
+    try:
+        response = await generate_key_helper_fn(
+            **{"duration": "1hr", "key_max_budget": 0, "models": [], "aliases": {}, "config": {}, "spend": 0, "user_id": "ishaan", "team_id": "litellm-dashboard"}  # type: ignore
+        )
+
+        print(response)
+        key = response["token"]
+
+        # make a /chat/completions call -> it should fail
+        request = Request(scope={"type": "http"})
+        request._url = URL(url="/chat/completions")
+
+        # use generated key to auth in
+        result = await user_api_key_auth(request=request, api_key="Bearer " + key)
+        print("result from user auth with new key", result)
+        pytest.fail(f"This should have failed!. IT's an invalid key")
+    except Exception as e:
+        print("Got Exception", e)
+        print(e.message)
diff --git a/litellm/tests/test_parallel_request_limiter.py b/litellm/tests/test_parallel_request_limiter.py
index 34dc0e3b5..528bb19d2 100644
--- a/litellm/tests/test_parallel_request_limiter.py
+++ b/litellm/tests/test_parallel_request_limiter.py
@@ -379,6 +379,7 @@ async def test_normal_router_tpm_limit():
         )
 
     except Exception as e:
+        print("Exception on test_normal_router_tpm_limit", e)
         assert e.status_code == 429
 
 
diff --git a/litellm/tests/test_proxy_startup.py b/litellm/tests/test_proxy_startup.py
index 650e2f8a7..a846c9f4a 100644
--- a/litellm/tests/test_proxy_startup.py
+++ b/litellm/tests/test_proxy_startup.py
@@ -33,6 +33,11 @@ def test_proxy_gunicorn_startup_direct_config():
     Test both approaches
     """
     try:
+        from litellm._logging import verbose_proxy_logger, verbose_router_logger
+        import logging
+
+        verbose_proxy_logger.setLevel(level=logging.DEBUG)
+        verbose_router_logger.setLevel(level=logging.DEBUG)
         filepath = os.path.dirname(os.path.abspath(__file__))
         # test with worker_config = config yaml
         config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
@@ -48,6 +53,11 @@ def test_proxy_gunicorn_startup_direct_config():
 
 def test_proxy_gunicorn_startup_config_dict():
     try:
+        from litellm._logging import verbose_proxy_logger, verbose_router_logger
+        import logging
+
+        verbose_proxy_logger.setLevel(level=logging.DEBUG)
+        verbose_router_logger.setLevel(level=logging.DEBUG)
         filepath = os.path.dirname(os.path.abspath(__file__))
         # test with worker_config = config yaml
         config_fp = f"{filepath}/test_configs/test_config_no_auth.yaml"
diff --git a/litellm/utils.py b/litellm/utils.py
index d0aded4e5..d797a4909 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -980,12 +980,9 @@ class Logging:
             self.model_call_details["log_event_type"] = "post_api_call"
 
             # User Logging -> if you pass in a custom logging function
-            verbose_logger.debug(
+            print_verbose(
                 f"RAW RESPONSE:\n{self.model_call_details.get('original_response', self.model_call_details)}\n\n"
             )
-            verbose_logger.debug(
-                f"Logging Details Post-API Call: LiteLLM Params: {self.model_call_details}"
-            )
             if self.logger_fn and callable(self.logger_fn):
                 try:
                     self.logger_fn(
@@ -1636,34 +1633,6 @@ class Logging:
                             end_time=end_time,
                             print_verbose=print_verbose,
                         )
-                if callback == "langfuse":
-                    global langFuseLogger
-                    print_verbose("reaches Async langfuse for logging!")
-                    kwargs = {}
-                    for k, v in self.model_call_details.items():
-                        if (
-                            k != "original_response"
-                        ):  # copy.deepcopy raises errors as this could be a coroutine
-                            kwargs[k] = v
-                    # this only logs streaming once, complete_streaming_response exists i.e when stream ends
-                    if self.stream:
-                        if "complete_streaming_response" not in kwargs:
-                            return
-                        else:
-                            print_verbose(
-                                "reaches Async langfuse for streaming logging!"
-                            )
-                            result = kwargs["complete_streaming_response"]
-                    if langFuseLogger is None:
-                        langFuseLogger = LangFuseLogger()
-                    await langFuseLogger._async_log_event(
-                        kwargs=kwargs,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        user_id=kwargs.get("user", None),
-                        print_verbose=print_verbose,
-                    )
             except:
                 print_verbose(
                     f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while success logging {traceback.format_exc()}"
@@ -1788,9 +1757,37 @@ class Logging:
                             response_obj=result,
                             kwargs=self.model_call_details,
                         )
+                    elif callback == "langfuse":
+                        global langFuseLogger
+                        verbose_logger.debug("reaches langfuse for logging!")
+                        kwargs = {}
+                        for k, v in self.model_call_details.items():
+                            if (
+                                k != "original_response"
+                            ):  # copy.deepcopy raises errors as this could be a coroutine
+                                kwargs[k] = v
+                        # this only logs streaming once, complete_streaming_response exists i.e when stream ends
+                        if langFuseLogger is None or (
+                            self.langfuse_public_key != langFuseLogger.public_key
+                            and self.langfuse_secret != langFuseLogger.secret_key
+                        ):
+                            langFuseLogger = LangFuseLogger(
+                                langfuse_public_key=self.langfuse_public_key,
+                                langfuse_secret=self.langfuse_secret,
+                            )
+                        langFuseLogger.log_event(
+                            start_time=start_time,
+                            end_time=end_time,
+                            response_obj=None,
+                            user_id=kwargs.get("user", None),
+                            print_verbose=print_verbose,
+                            status_message=str(exception),
+                            level="ERROR",
+                            kwargs=self.model_call_details,
+                        )
                 except Exception as e:
                     print_verbose(
-                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {traceback.format_exc()}"
+                        f"LiteLLM.LoggingError: [Non-Blocking] Exception occurred while failure logging with integrations {str(e)}"
                     )
                     print_verbose(
                         f"LiteLLM.Logging: is sentry capture exception initialized {capture_exception}"
@@ -3860,6 +3857,8 @@ def get_optional_params(
             and custom_llm_provider != "text-completion-openai"
             and custom_llm_provider != "azure"
             and custom_llm_provider != "vertex_ai"
+            and custom_llm_provider != "anyscale"
+            and custom_llm_provider != "together_ai"
         ):
             if custom_llm_provider == "ollama" or custom_llm_provider == "ollama_chat":
                 # ollama actually supports json output
@@ -3878,11 +3877,6 @@ def get_optional_params(
                     optional_params[
                         "functions_unsupported_model"
                     ] = non_default_params.pop("functions")
-            elif (
-                custom_llm_provider == "anyscale"
-                and model == "mistralai/Mistral-7B-Instruct-v0.1"
-            ):  # anyscale just supports function calling with mistral
-                pass
             elif (
                 litellm.add_function_to_prompt
             ):  # if user opts to add it to prompt instead
@@ -4095,6 +4089,8 @@ def get_optional_params(
             "top_p",
             "stop",
             "frequency_penalty",
+            "tools",
+            "tool_choice",
         ]
         _check_valid_arg(supported_params=supported_params)
 
@@ -4112,6 +4108,10 @@ def get_optional_params(
             ] = frequency_penalty  # https://docs.together.ai/reference/inference
         if stop is not None:
             optional_params["stop"] = stop
+        if tools is not None:
+            optional_params["tools"] = tools
+        if tool_choice is not None:
+            optional_params["tool_choice"] = tool_choice
     elif custom_llm_provider == "ai21":
         ## check if unsupported param passed in
         supported_params = [
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
index b6ded001c..4c28bdbe8 100644
--- a/model_prices_and_context_window.json
+++ b/model_prices_and_context_window.json
@@ -156,8 +156,8 @@
         "max_tokens": 4097,
         "max_input_tokens": 4097,
         "max_output_tokens": 4096,
-        "input_cost_per_token": 0.000012,
-        "output_cost_per_token": 0.000016,
+        "input_cost_per_token": 0.000003,
+        "output_cost_per_token": 0.000006,
         "litellm_provider": "openai",
         "mode": "chat"
     },
diff --git a/pyproject.toml b/pyproject.toml
index 256624417..17d80ae8e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.22.4"
+version = "1.22.8"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -69,7 +69,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.22.4"
+version = "1.22.8"
 version_files = [
     "pyproject.toml:^version"
 ]
diff --git a/requirements.txt b/requirements.txt
index 6b82c993a..3ace5872a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ redisvl==0.0.7 # semantic caching
 numpy==1.24.3 # semantic caching
 prisma==0.11.0 # for db
 mangum==0.17.0 # for aws lambda functions
-google-generativeai==0.1.0 # for vertex ai calls
+google-generativeai==0.3.2 # for vertex ai calls
 async_generator==1.10.0 # for async ollama calls
 traceloop-sdk==0.5.3 # for open telemetry logging
 langfuse>=2.6.3 # for langfuse self-hosted logging
diff --git a/ui/litellm-dashboard/src/app/layout.tsx b/ui/litellm-dashboard/src/app/layout.tsx
index 3314e4780..a04a0d66e 100644
--- a/ui/litellm-dashboard/src/app/layout.tsx
+++ b/ui/litellm-dashboard/src/app/layout.tsx
@@ -5,8 +5,8 @@ import "./globals.css";
 const inter = Inter({ subsets: ["latin"] });
 
 export const metadata: Metadata = {
-  title: "Create Next App",
-  description: "Generated by create next app",
+  title: "🚅 LiteLLM",
+  description: "LiteLLM Proxy Admin UI",
 };
 
 export default function RootLayout({