Merge branch 'main' into litellm_redis_team_object

2025-04-26 19:24:27 +00:00 · 2024-07-25 19:31:52 -07:00 · 2024-07-25 19:31:52 -07:00 · c2086300b7
commit c2086300b7
parent 6ab2527fdc 80e2facf3d
34 changed files with 1182 additions and 232 deletions
--- a/litellm/proxy/_new_secret_config.yaml
+++ b/litellm/proxy/_new_secret_config.yaml
@ -1,9 +1,11 @@
 model_list:
-  - model_name: "*"             # all requests where model not in your config go to this deployment
+  - model_name: "test-model"             
    litellm_params:
-      model: "openai/*"           # passes our validation check that a real provider is given
-      api_key: "" 
+      model: "openai/text-embedding-ada-002"
+  - model_name: "my-custom-model"
+    litellm_params:
+      model: "my-custom-llm/my-model"

 litellm_settings:
-  cache: True
-  
+  custom_provider_map:
+  - {"provider": "my-custom-llm", "custom_handler": custom_handler.my_custom_llm}
--- a/litellm/proxy/custom_handler.py
+++ b/litellm/proxy/custom_handler.py
@ -0,0 +1,21 @@
+import litellm
+from litellm import CustomLLM, completion, get_llm_provider
+
+
+class MyCustomLLM(CustomLLM):
+    def completion(self, *args, **kwargs) -> litellm.ModelResponse:
+        return litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            mock_response="Hi!",
+        )  # type: ignore
+
+    async def acompletion(self, *args, **kwargs) -> litellm.ModelResponse:
+        return litellm.completion(
+            model="gpt-3.5-turbo",
+            messages=[{"role": "user", "content": "Hello world"}],
+            mock_response="Hi!",
+        )  # type: ignore
+
+
+my_custom_llm = MyCustomLLM()
--- a/litellm/proxy/management_endpoints/internal_user_endpoints.py
+++ b/litellm/proxy/management_endpoints/internal_user_endpoints.py
@ -27,6 +27,7 @@ from litellm._logging import verbose_proxy_logger
 from litellm.proxy._types import *
 from litellm.proxy.auth.user_api_key_auth import user_api_key_auth
 from litellm.proxy.management_endpoints.key_management_endpoints import (
+    _duration_in_seconds,
    generate_key_helper_fn,
 )
 from litellm.proxy.management_helpers.utils import (
@ -486,6 +487,13 @@ async def user_update(
            ):  # models default to [], spend defaults to 0, we should not reset these values
                non_default_values[k] = v

+        if "budget_duration" in non_default_values:
+            duration_s = _duration_in_seconds(
+                duration=non_default_values["budget_duration"]
+            )
+            user_reset_at = datetime.now(timezone.utc) + timedelta(seconds=duration_s)
+            non_default_values["budget_reset_at"] = user_reset_at
+
        ## ADD USER, IF NEW ##
        verbose_proxy_logger.debug("/user/update: Received data = %s", data)
        if data.user_id is not None and len(data.user_id) > 0:
--- a/litellm/proxy/proxy_config.yaml
+++ b/litellm/proxy/proxy_config.yaml
@ -8,6 +8,12 @@ model_list:
    litellm_params:
      model: fireworks_ai/accounts/fireworks/models/llama-v3-70b-instruct
      api_key: "os.environ/FIREWORKS"
+  - model_name: tts
+    litellm_params:
+      model: openai/tts-1
+      api_key: "os.environ/OPENAI_API_KEY"
+    model_info:
+      mode: audio_speech
 general_settings: 
  master_key: sk-1234
  alerting: ["slack"]
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@ -1507,6 +1507,21 @@ class ProxyConfig:
                    verbose_proxy_logger.debug(
                        f"litellm.post_call_rules: {litellm.post_call_rules}"
                    )
+                elif key == "custom_provider_map":
+                    from litellm.utils import custom_llm_setup
+
+                    litellm.custom_provider_map = [
+                        {
+                            "provider": item["provider"],
+                            "custom_handler": get_instance_fn(
+                                value=item["custom_handler"],
+                                config_file_path=config_file_path,
+                            ),
+                        }
+                        for item in value
+                    ]
+
+                    custom_llm_setup()
                elif key == "success_callback":
                    litellm.success_callback = []

@ -3334,6 +3349,7 @@ async def embeddings(
        if (
            "input" in data
            and isinstance(data["input"], list)
+            and len(data["input"]) > 0
            and isinstance(data["input"][0], list)
            and isinstance(data["input"][0][0], int)
        ):  # check if array of tokens passed in
@ -3464,8 +3480,8 @@ async def embeddings(
            litellm_debug_info,
        )
        verbose_proxy_logger.error(
-            "litellm.proxy.proxy_server.embeddings(): Exception occured - {}".format(
-                str(e)
+            "litellm.proxy.proxy_server.embeddings(): Exception occured - {}\n{}".format(
+                str(e), traceback.format_exc()
            )
        )
        verbose_proxy_logger.debug(traceback.format_exc())