From bd37a9cb5e3e34aa4550a41c43ab9c28db825f6f Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 11:12:16 -0800
Subject: [PATCH 01/23] (fix) proxy - streaming sagemaker

---
 litellm/proxy/proxy_server.py         | 26 ++++++++++++++++++--------
 litellm/proxy/tests/test_openai_js.js |  2 +-
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 78e756a2a..f4eb04874 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1658,11 +1658,16 @@ async def completion(
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
             custom_headers = {"x-litellm-model-id": model_id}
-            return StreamingResponse(
-                async_data_generator(
-                    user_api_key_dict=user_api_key_dict,
+            stream_content = async_data_generator(
+                user_api_key_dict=user_api_key_dict,
+                response=response,
+            )
+            if response.custom_llm_provider == "sagemaker":
+                stream_content = data_generator(
                     response=response,
-                ),
+                )
+            return StreamingResponse(
+                stream_content,
                 media_type="text/event-stream",
                 headers=custom_headers,
             )
@@ -1820,11 +1825,16 @@ async def chat_completion(
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
             custom_headers = {"x-litellm-model-id": model_id}
-            return StreamingResponse(
-                async_data_generator(
-                    user_api_key_dict=user_api_key_dict,
+            stream_content = async_data_generator(
+                user_api_key_dict=user_api_key_dict,
+                response=response,
+            )
+            if response.custom_llm_provider == "sagemaker":
+                stream_content = data_generator(
                     response=response,
-                ),
+                )
+            return StreamingResponse(
+                stream_content,
                 media_type="text/event-stream",
                 headers=custom_headers,
             )
diff --git a/litellm/proxy/tests/test_openai_js.js b/litellm/proxy/tests/test_openai_js.js
index 7e74eeca3..c0f25cf05 100644
--- a/litellm/proxy/tests/test_openai_js.js
+++ b/litellm/proxy/tests/test_openai_js.js
@@ -4,7 +4,7 @@ const openai = require('openai');
 process.env.DEBUG=false;
 async function runOpenAI() {
   const client = new openai.OpenAI({
-    apiKey: 'sk-yPX56TDqBpr23W7ruFG3Yg',
+    apiKey: 'sk-JkKeNi6WpWDngBsghJ6B9g',
     baseURL: 'http://0.0.0.0:8000'
   });
 

From a61dbc1613c4696a9d6a0d675371a6a7d21a5974 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:08:58 -0800
Subject: [PATCH 02/23] (fix) select_data_generator - sagemaker

---
 litellm/proxy/proxy_server.py | 37 ++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index f4eb04874..af5d6d5ac 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1422,6 +1422,19 @@ async def async_data_generator(response, user_api_key_dict):
         yield f"data: {str(e)}\n\n"
 
 
+def select_data_generator(response, user_api_key_dict):
+    # since boto3 - sagemaker does not support async calls
+    if response.custom_llm_provider == "sagemaker":
+        return data_generator(
+            response=response,
+        )
+    else:
+        # default to async_data_generator
+        return async_data_generator(
+            response=response, user_api_key_dict=user_api_key_dict
+        )
+
+
 def get_litellm_model_info(model: dict = {}):
     model_info = model.get("model_info", {})
     model_to_lookup = model.get("litellm_params", {}).get("model", None)
@@ -1658,16 +1671,12 @@ async def completion(
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
             custom_headers = {"x-litellm-model-id": model_id}
-            stream_content = async_data_generator(
-                user_api_key_dict=user_api_key_dict,
-                response=response,
+            selected_data_generator = select_data_generator(
+                response=response, user_api_key_dict=user_api_key_dict
             )
-            if response.custom_llm_provider == "sagemaker":
-                stream_content = data_generator(
-                    response=response,
-                )
+
             return StreamingResponse(
-                stream_content,
+                selected_data_generator,
                 media_type="text/event-stream",
                 headers=custom_headers,
             )
@@ -1825,16 +1834,12 @@ async def chat_completion(
             "stream" in data and data["stream"] == True
         ):  # use generate_responses to stream responses
             custom_headers = {"x-litellm-model-id": model_id}
-            stream_content = async_data_generator(
-                user_api_key_dict=user_api_key_dict,
-                response=response,
+            selected_data_generator = select_data_generator(
+                response=response, user_api_key_dict=user_api_key_dict
             )
-            if response.custom_llm_provider == "sagemaker":
-                stream_content = data_generator(
-                    response=response,
-                )
+
             return StreamingResponse(
-                stream_content,
+                selected_data_generator,
                 media_type="text/event-stream",
                 headers=custom_headers,
             )

From 44e213e842d0ad7121aa56f39b156de6028686ca Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:13:34 -0800
Subject: [PATCH 03/23] (fix) select_data_generator

---
 litellm/proxy/proxy_server.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index af5d6d5ac..a1790f49c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -1423,13 +1423,22 @@ async def async_data_generator(response, user_api_key_dict):
 
 
 def select_data_generator(response, user_api_key_dict):
-    # since boto3 - sagemaker does not support async calls
-    if response.custom_llm_provider == "sagemaker":
-        return data_generator(
-            response=response,
-        )
-    else:
-        # default to async_data_generator
+    try:
+        # since boto3 - sagemaker does not support async calls, we should use a sync data_generator
+        if (
+            hasattr(response, "custom_llm_provider")
+            and response.custom_llm_provider == "sagemaker"
+        ):
+            return data_generator(
+                response=response,
+            )
+        else:
+            # default to async_data_generator
+            return async_data_generator(
+                response=response, user_api_key_dict=user_api_key_dict
+            )
+    except:
+        # worst case - use async_data_generator
         return async_data_generator(
             response=response, user_api_key_dict=user_api_key_dict
         )

From e8cd27f2b75277cd8ae2f2ea02014ac6e4e1ecfd Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:31:16 -0800
Subject: [PATCH 04/23] (fix) sagemaker streaming support

---
 litellm/utils.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index 00b76bfb5..85d160334 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7732,10 +7732,8 @@ class CustomStreamWrapper:
                     ]
                     self.sent_last_chunk = True
             elif self.custom_llm_provider == "sagemaker":
-                print_verbose(f"ENTERS SAGEMAKER STREAMING")
-                new_chunk = next(self.completion_stream)
-
-                completion_obj["content"] = new_chunk
+                print_verbose(f"ENTERS SAGEMAKER STREAMING for chunk {chunk}")
+                completion_obj["content"] = chunk
             elif self.custom_llm_provider == "petals":
                 if len(self.completion_stream) == 0:
                     if self.sent_last_chunk:
@@ -7854,7 +7852,7 @@ class CustomStreamWrapper:
                             completion_obj["role"] = "assistant"
                             self.sent_first_chunk = True
                         model_response.choices[0].delta = Delta(**completion_obj)
-                    print_verbose(f"model_response: {model_response}")
+                    print_verbose(f"returning model_response: {model_response}")
                     return model_response
                 else:
                     return

From 6d81da3f430b4865be50dbb979a7358978cfac49 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:37:19 -0800
Subject: [PATCH 05/23] =?UTF-8?q?bump:=20version=201.18.10=20=E2=86=92=201?=
 =?UTF-8?q?.18.11?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 00d424e79..2be49d95d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.18.10"
+version = "1.18.11"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT"
@@ -61,7 +61,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.18.10"
+version = "1.18.11"
 version_files = [
     "pyproject.toml:^version"
 ]

From 39b4f19bd8acedc77cae67b881f1dde4149fc9fc Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:56:03 -0800
Subject: [PATCH 06/23] (fix) same response_id across chunk

---
 litellm/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/utils.py b/litellm/utils.py
index 85d160334..a400a899e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -7041,6 +7041,7 @@ class CustomStreamWrapper:
         self._hidden_params = {
             "model_id": (_model_info.get("id", None))
         }  # returned as x-litellm-model-id response header in proxy
+        self.response_id = None
 
     def __iter__(self):
         return self
@@ -7613,6 +7614,10 @@ class CustomStreamWrapper:
 
     def chunk_creator(self, chunk):
         model_response = ModelResponse(stream=True, model=self.model)
+        if self.response_id is not None:
+            model_response.id = self.response_id
+        else:
+            self.response_id = model_response.id
         model_response._hidden_params["custom_llm_provider"] = self.custom_llm_provider
         model_response.choices = [StreamingChoices()]
         model_response.choices[0].finish_reason = None

From d455833dfb5477818f1269e72fd5a067f61aedaf Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 12:57:04 -0800
Subject: [PATCH 07/23] (test) same response id across chunks

---
 litellm/tests/test_completion.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py
index 421580253..b2c69804c 100644
--- a/litellm/tests/test_completion.py
+++ b/litellm/tests/test_completion.py
@@ -1408,9 +1408,15 @@ def test_completion_sagemaker_stream():
         )
 
         complete_streaming_response = ""
-
-        for chunk in response:
+        first_chunk_id, chunk_id = None, None
+        for i, chunk in enumerate(response):
             print(chunk)
+            chunk_id = chunk.id
+            print(chunk_id)
+            if i == 0:
+                first_chunk_id = chunk_id
+            else:
+                assert chunk_id == first_chunk_id
             complete_streaming_response += chunk.choices[0].delta.content or ""
         # Add any assertions here to check the response
         # print(response)

From 64a387d09bf5e3e6e7e8ec3faa18414a79b33fb0 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 13:06:05 -0800
Subject: [PATCH 08/23] (test) test chunk_ids match across chunks for bedrock

---
 litellm/tests/test_streaming.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/litellm/tests/test_streaming.py b/litellm/tests/test_streaming.py
index 959e63d59..14b1a7210 100644
--- a/litellm/tests/test_streaming.py
+++ b/litellm/tests/test_streaming.py
@@ -733,8 +733,15 @@ def test_completion_bedrock_claude_stream():
         complete_response = ""
         has_finish_reason = False
         # Add any assertions here to check the response
+        first_chunk_id = None
         for idx, chunk in enumerate(response):
             # print
+            if idx == 0:
+                first_chunk_id = chunk.id
+            else:
+                assert (
+                    chunk.id == first_chunk_id
+                ), f"chunk ids do not match: {chunk.id} != first chunk id{first_chunk_id}"
             chunk, finished = streaming_format_tests(idx, chunk)
             has_finish_reason = finished
             complete_response += chunk

From b40176810e93bb03488af4e1c600404402402575 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 13:27:49 -0800
Subject: [PATCH 09/23] (test) dynamic timeouts - router

---
 litellm/tests/test_router.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/litellm/tests/test_router.py b/litellm/tests/test_router.py
index 6c5e8ee7d..d7ab4b880 100644
--- a/litellm/tests/test_router.py
+++ b/litellm/tests/test_router.py
@@ -960,3 +960,29 @@ def test_router_anthropic_key_dynamic():
     messages = [{"role": "user", "content": "Hey, how's it going?"}]
     router.completion(model="anthropic-claude", messages=messages)
     os.environ["ANTHROPIC_API_KEY"] = anthropic_api_key
+
+
+def test_router_timeout():
+    model_list = [
+        {
+            "model_name": "gpt-3.5-turbo",
+            "litellm_params": {
+                "model": "gpt-3.5-turbo",
+                "api_key": "os.environ/OPENAI_API_KEY",
+            },
+        }
+    ]
+    router = Router(model_list=model_list)
+    messages = [{"role": "user", "content": "Hey, how's it going?"}]
+    start_time = time.time()
+    try:
+        res = router.completion(
+            model="gpt-3.5-turbo", messages=messages, timeout=0.0001
+        )
+        print(res)
+        pytest.fail("this should have timed out")
+    except litellm.exceptions.Timeout as e:
+        print("got timeout exception")
+        print(e)
+        print(vars(e))
+        pass

From 2f11b92698a1e7d07abd139d8fe934a022f0640f Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 14:15:19 -0800
Subject: [PATCH 10/23] v0 view spend logs

---
 ui/admin.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/ui/admin.py b/ui/admin.py
index 2d823d85d..80eca7d3c 100644
--- a/ui/admin.py
+++ b/ui/admin.py
@@ -178,6 +178,42 @@ def list_models():
         )
 
 
+def usage_stats():
+    import streamlit as st
+    import requests
+
+    # Check if the necessary configuration is available
+    if (
+        st.session_state.get("api_url", None) is not None
+        and st.session_state.get("proxy_key", None) is not None
+    ):
+        # Make the GET request
+        try:
+            complete_url = ""
+            if isinstance(st.session_state["api_url"], str) and st.session_state[
+                "api_url"
+            ].endswith("/"):
+                complete_url = f"{st.session_state['api_url']}models"
+            else:
+                complete_url = f"{st.session_state['api_url']}/models"
+            response = requests.get(
+                complete_url,
+                headers={"Authorization": f"Bearer {st.session_state['proxy_key']}"},
+            )
+            # Check if the request was successful
+            if response.status_code == 200:
+                models = response.json()
+                st.write(models)  # or st.json(models) to pretty print the JSON
+            else:
+                st.error(f"Failed to get models. Status code: {response.status_code}")
+        except Exception as e:
+            st.error(f"An error occurred while requesting models: {e}")
+    else:
+        st.warning(
+            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+        )
+
+
 def create_key():
     import streamlit as st
     import json, requests, uuid
@@ -338,6 +374,7 @@ def admin_page(is_admin="NOT_GIVEN"):
             "Add Models",
             "List Models",
             "Create Key",
+            "Usage Stats",
             "End-User Auth",
         ),
     )
@@ -369,6 +406,8 @@ def admin_page(is_admin="NOT_GIVEN"):
         list_models()
     elif page == "Create Key":
         create_key()
+    elif page == "Usage Stats":
+        usage_stats()
 
 
 admin_page()

From a2da9c30fbf7c0232944d0e3f8d7c7129f6052c7 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 15:10:10 -0800
Subject: [PATCH 11/23] (feat) add /spend/keys endpoint

---
 litellm/proxy/proxy_server.py | 24 ++++++++++++++++++++++++
 litellm/proxy/utils.py        |  4 ++++
 2 files changed, 28 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a1790f49c..a69e37958 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2305,6 +2305,30 @@ async def info_key_fn(
         )
 
 
+@router.get(
+    "/spend/keys",
+    tags=["Budget & Spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def spend_key_fn():
+    global prisma_client
+    try:
+        if prisma_client is None:
+            raise Exception(
+                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+            )
+
+        key_info = await prisma_client.get_data(table_name="key", query_type="find_all")
+
+        return key_info
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail={"error": str(e)},
+        )
+
+
 #### USER MANAGEMENT ####
 @router.post(
     "/user/new",
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index c19137d57..2a5495919 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -391,6 +391,10 @@ class PrismaClient:
                         for r in response:
                             if isinstance(r.expires, datetime):
                                 r.expires = r.expires.isoformat()
+                elif query_type == "find_all":
+                    response = await self.db.litellm_verificationtoken.find_many(
+                        order={"spend": "desc"},
+                    )
                 print_verbose(f"PrismaClient: response={response}")
                 if response is not None:
                     return response

From 1158ff49952038ecaa7280a0f056294c9890d987 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 15:58:14 -0800
Subject: [PATCH 12/23] (feat) use cli args to start streamlit

---
 litellm/proxy/admin_ui.py |   4 +-
 ui/admin.py               | 101 ++++++++++++++++++++++++++++----------
 2 files changed, 78 insertions(+), 27 deletions(-)

diff --git a/litellm/proxy/admin_ui.py b/litellm/proxy/admin_ui.py
index d50d8be90..c72cd88f0 100644
--- a/litellm/proxy/admin_ui.py
+++ b/litellm/proxy/admin_ui.py
@@ -98,7 +98,7 @@ def list_models():
             st.error(f"An error occurred while requesting models: {e}")
     else:
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
 
@@ -151,7 +151,7 @@ def create_key():
                 raise e
     else:
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
 
diff --git a/ui/admin.py b/ui/admin.py
index 80eca7d3c..59fa034e1 100644
--- a/ui/admin.py
+++ b/ui/admin.py
@@ -6,6 +6,9 @@ from dotenv import load_dotenv
 load_dotenv()
 import streamlit as st
 import base64, os, json, uuid, requests
+import pandas as pd
+import plotly.express as px
+import click
 
 # Replace your_base_url with the actual URL where the proxy auth app is hosted
 your_base_url = os.getenv("BASE_URL")  # Example base URL
@@ -75,7 +78,7 @@ def add_new_model():
         and st.session_state.get("proxy_key", None) is None
     ):
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
     model_name = st.text_input(
@@ -174,11 +177,11 @@ def list_models():
             st.error(f"An error occurred while requesting models: {e}")
     else:
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
 
-def usage_stats():
+def spend_per_key():
     import streamlit as st
     import requests
 
@@ -193,27 +196,50 @@ def usage_stats():
             if isinstance(st.session_state["api_url"], str) and st.session_state[
                 "api_url"
             ].endswith("/"):
-                complete_url = f"{st.session_state['api_url']}models"
+                complete_url = f"{st.session_state['api_url']}/spend/keys"
             else:
-                complete_url = f"{st.session_state['api_url']}/models"
+                complete_url = f"{st.session_state['api_url']}/spend/keys"
             response = requests.get(
                 complete_url,
                 headers={"Authorization": f"Bearer {st.session_state['proxy_key']}"},
             )
             # Check if the request was successful
             if response.status_code == 200:
-                models = response.json()
-                st.write(models)  # or st.json(models) to pretty print the JSON
+                spend_per_key = response.json()
+                # Create DataFrame
+                spend_df = pd.DataFrame(spend_per_key)
+
+                # Display the spend per key as a graph
+                st.write("Spend per Key - Top 10:")
+                top_10_df = spend_df.nlargest(10, "spend")
+                fig = px.bar(
+                    top_10_df,
+                    x="token",
+                    y="spend",
+                    title="Top 10 Spend per Key",
+                    height=500,  # Adjust the height
+                    width=800,  # Adjust the width)
+                )
+                st.plotly_chart(fig)
+
+                # Display the spend per key as a table
+                st.write("Spend per Key - Full Table:")
+                st.table(spend_df)
+
             else:
                 st.error(f"Failed to get models. Status code: {response.status_code}")
         except Exception as e:
             st.error(f"An error occurred while requesting models: {e}")
     else:
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
 
+def spend_per_user():
+    pass
+
+
 def create_key():
     import streamlit as st
     import json, requests, uuid
@@ -223,7 +249,7 @@ def create_key():
         and st.session_state.get("proxy_key", None) is None
     ):
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
     duration = st.text_input("Duration - Can be in (h,m,s)", placeholder="1h")
@@ -271,7 +297,7 @@ def update_config():
         and st.session_state.get("proxy_key", None) is None
     ):
         st.warning(
-            "Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page."
+            f"Please configure the Proxy Endpoint and Proxy Key on the Proxy Setup page. Currently set Proxy Endpoint: {st.session_state.get('api_url', None)} and Proxy Key: {st.session_state.get('proxy_key', None)}"
         )
 
     st.markdown("#### Alerting")
@@ -360,12 +386,16 @@ def update_config():
             raise e
 
 
-def admin_page(is_admin="NOT_GIVEN"):
+def admin_page(is_admin="NOT_GIVEN", input_api_url=None, input_proxy_key=None):
     # Display the form for the admin to set the proxy URL and allowed email subdomain
+    st.set_page_config(
+        layout="wide",  # Use "wide" layout for more space
+    )
     st.header("Admin Configuration")
     st.session_state.setdefault("is_admin", is_admin)
     # Add a navigation sidebar
     st.sidebar.title("Navigation")
+
     page = st.sidebar.radio(
         "Go to",
         (
@@ -374,23 +404,31 @@ def admin_page(is_admin="NOT_GIVEN"):
             "Add Models",
             "List Models",
             "Create Key",
-            "Usage Stats",
+            "View Spend Per Key",
+            "View Spend Per User",
             "End-User Auth",
         ),
     )
     # Display different pages based on navigation selection
     if page == "Connect to Proxy":
         # Use text inputs with intermediary variables
-        input_api_url = st.text_input(
-            "Proxy Endpoint",
-            value=st.session_state.get("api_url", ""),
-            placeholder="http://0.0.0.0:8000",
-        )
-        input_proxy_key = st.text_input(
-            "Proxy Key",
-            value=st.session_state.get("proxy_key", ""),
-            placeholder="sk-...",
-        )
+        if input_api_url is None:
+            input_api_url = st.text_input(
+                "Proxy Endpoint",
+                value=st.session_state.get("api_url", ""),
+                placeholder="http://0.0.0.0:8000",
+            )
+        else:
+            st.session_state["api_url"] = input_api_url
+
+        if input_proxy_key is None:
+            input_proxy_key = st.text_input(
+                "Proxy Key",
+                value=st.session_state.get("proxy_key", ""),
+                placeholder="sk-...",
+            )
+        else:
+            st.session_state["proxy_key"] = input_proxy_key
         # When the "Save" button is clicked, update the session state
         if st.button("Save"):
             st.session_state["api_url"] = input_api_url
@@ -406,8 +444,21 @@ def admin_page(is_admin="NOT_GIVEN"):
         list_models()
     elif page == "Create Key":
         create_key()
-    elif page == "Usage Stats":
-        usage_stats()
+    elif page == "View Spend Per Key":
+        spend_per_key()
+    elif page == "View Spend Per User":
+        spend_per_user()
 
 
-admin_page()
+# admin_page()
+
+
+@click.command()
+@click.option("--proxy_endpoint", type=str, help="Proxy Endpoint")
+@click.option("--proxy_master_key", type=str, help="Proxy Master Key")
+def main(proxy_endpoint, proxy_master_key):
+    admin_page(input_api_url=proxy_endpoint, input_proxy_key=proxy_master_key)
+
+
+if __name__ == "__main__":
+    main()

From f8870fb48e001ba37733f8359103e109e164fdc9 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 15:59:03 -0800
Subject: [PATCH 13/23] fix(utils.py): fix proxy streaming spend tracking

---
 litellm/proxy/proxy_server.py  | 12 +++--
 litellm/utils.py               | 48 ++++++++++++++++----
 tests/test_keys.py             | 83 +++++++++++++++++++++++++++++++++-
 tests/test_openai_endpoints.py |  1 +
 4 files changed, 130 insertions(+), 14 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 78e756a2a..af6d3fd3a 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -570,7 +570,7 @@ async def track_cost_callback(
         litellm_params = kwargs.get("litellm_params", {}) or {}
         proxy_server_request = litellm_params.get("proxy_server_request") or {}
         user_id = proxy_server_request.get("body", {}).get("user", None)
-        if "response_cost" in kwargs:
+        if kwargs.get("response_cost", None) is not None:
             response_cost = kwargs["response_cost"]
             user_api_key = kwargs["litellm_params"]["metadata"].get(
                 "user_api_key", None
@@ -596,9 +596,13 @@ async def track_cost_callback(
                     end_time=end_time,
                 )
         else:
-            raise Exception(
-                f"Model not in litellm model cost map. Add custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
-            )
+            if kwargs["stream"] != True or (
+                kwargs["stream"] == True
+                and kwargs.get("complete_streaming_response") in kwargs
+            ):
+                raise Exception(
+                    f"Model not in litellm model cost map. Add custom pricing - https://docs.litellm.ai/docs/proxy/custom_pricing"
+                )
     except Exception as e:
         verbose_proxy_logger.debug(f"error in tracking cost callback - {str(e)}")
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 00b76bfb5..762f94af4 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1067,9 +1067,13 @@ class Logging:
             ## if model in model cost map - log the response cost
             ## else set cost to None
             verbose_logger.debug(f"Model={self.model}; result={result}")
-            if result is not None and (
-                isinstance(result, ModelResponse)
-                or isinstance(result, EmbeddingResponse)
+            if (
+                result is not None
+                and (
+                    isinstance(result, ModelResponse)
+                    or isinstance(result, EmbeddingResponse)
+                )
+                and self.stream != True
             ):
                 try:
                     self.model_call_details["response_cost"] = litellm.completion_cost(
@@ -1104,6 +1108,12 @@ class Logging:
         self, result=None, start_time=None, end_time=None, cache_hit=None, **kwargs
     ):
         verbose_logger.debug(f"Logging Details LiteLLM-Success Call")
+        start_time, end_time, result = self._success_handler_helper_fn(
+            start_time=start_time,
+            end_time=end_time,
+            result=result,
+            cache_hit=cache_hit,
+        )
         # print(f"original response in success handler: {self.model_call_details['original_response']}")
         try:
             verbose_logger.debug(f"success callbacks: {litellm.success_callback}")
@@ -1119,6 +1129,8 @@ class Logging:
                         complete_streaming_response = litellm.stream_chunk_builder(
                             self.sync_streaming_chunks,
                             messages=self.model_call_details.get("messages", None),
+                            start_time=start_time,
+                            end_time=end_time,
                         )
                     except:
                         complete_streaming_response = None
@@ -1132,13 +1144,19 @@ class Logging:
                 self.model_call_details[
                     "complete_streaming_response"
                 ] = complete_streaming_response
+                try:
+                    self.model_call_details["response_cost"] = litellm.completion_cost(
+                        completion_response=complete_streaming_response,
+                    )
+                    verbose_logger.debug(
+                        f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+                    )
+                except litellm.NotFoundError as e:
+                    verbose_logger.debug(
+                        f"Model={self.model} not found in completion cost map."
+                    )
+                    self.model_call_details["response_cost"] = None
 
-            start_time, end_time, result = self._success_handler_helper_fn(
-                start_time=start_time,
-                end_time=end_time,
-                result=result,
-                cache_hit=cache_hit,
-            )
             for callback in litellm.success_callback:
                 try:
                     if callback == "lite_debugger":
@@ -1423,6 +1441,18 @@ class Logging:
             self.model_call_details[
                 "complete_streaming_response"
             ] = complete_streaming_response
+            try:
+                self.model_call_details["response_cost"] = litellm.completion_cost(
+                    completion_response=complete_streaming_response,
+                )
+                verbose_logger.debug(
+                    f"Model={self.model}; cost={self.model_call_details['response_cost']}"
+                )
+            except litellm.NotFoundError as e:
+                verbose_logger.debug(
+                    f"Model={self.model} not found in completion cost map."
+                )
+                self.model_call_details["response_cost"] = None
 
         for callback in litellm._async_success_callback:
             try:
diff --git a/tests/test_keys.py b/tests/test_keys.py
index f209f4c5a..f06b6721e 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -4,13 +4,20 @@
 import pytest
 import asyncio
 import aiohttp
+from openai import AsyncOpenAI
+import sys, os
+
+sys.path.insert(
+    0, os.path.abspath("../")
+)  # Adds the parent directory to the system path
+import litellm
 
 
 async def generate_key(session, i):
     url = "http://0.0.0.0:4000/key/generate"
     headers = {"Authorization": "Bearer sk-1234", "Content-Type": "application/json"}
     data = {
-        "models": ["azure-models"],
+        "models": ["azure-models", "gpt-4"],
         "aliases": {"mistral-7b": "gpt-3.5-turbo"},
         "duration": None,
     }
@@ -82,6 +89,34 @@ async def chat_completion(session, key, model="gpt-4"):
         if status != 200:
             raise Exception(f"Request did not return a 200 status code: {status}")
 
+        return await response.json()
+
+
+async def chat_completion_streaming(session, key, model="gpt-4"):
+    client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+    ]
+    data = {
+        "model": model,
+        "messages": messages,
+        "stream": True,
+    }
+    response = await client.chat.completions.create(**data)
+
+    content = ""
+    async for chunk in response:
+        content += chunk.choices[0].delta.content or ""
+
+    print(f"content: {content}")
+    prompt_tokens = litellm.token_counter(model="azure/gpt-35-turbo", messages=messages)
+    completion_tokens = litellm.token_counter(
+        model="azure/gpt-35-turbo", text=content, count_response_tokens=True
+    )
+
+    return prompt_tokens, completion_tokens
+
 
 @pytest.mark.asyncio
 async def test_key_update():
@@ -181,3 +216,49 @@ async def test_key_info():
         random_key = key_gen["key"]
         status = await get_key_info(session=session, get_key=key, call_key=random_key)
         assert status == 403
+
+
+@pytest.mark.asyncio
+async def test_key_info_spend_values():
+    """
+    - create key
+    - make completion call
+    - assert cost is expected value
+    """
+    async with aiohttp.ClientSession() as session:
+        ## Test Spend Update ##
+        # completion
+        # response = await chat_completion(session=session, key=key)
+        # prompt_cost, completion_cost = litellm.cost_per_token(
+        #     model="azure/gpt-35-turbo",
+        #     prompt_tokens=response["usage"]["prompt_tokens"],
+        #     completion_tokens=response["usage"]["completion_tokens"],
+        # )
+        # response_cost = prompt_cost + completion_cost
+        # await asyncio.sleep(5)  # allow db log to be updated
+        # key_info = await get_key_info(session=session, get_key=key, call_key=key)
+        # print(
+        #     f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
+        # )
+        # assert response_cost == key_info["info"]["spend"]
+        ## streaming
+        key_gen = await generate_key(session=session, i=0)
+        new_key = key_gen["key"]
+        prompt_tokens, completion_tokens = await chat_completion_streaming(
+            session=session, key=new_key
+        )
+        print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
+        prompt_cost, completion_cost = litellm.cost_per_token(
+            model="azure/gpt-35-turbo",
+            prompt_tokens=prompt_tokens,
+            completion_tokens=completion_tokens,
+        )
+        response_cost = prompt_cost + completion_cost
+        await asyncio.sleep(5)  # allow db log to be updated
+        key_info = await get_key_info(
+            session=session, get_key=new_key, call_key=new_key
+        )
+        print(
+            f"response_cost: {response_cost}; key_info spend: {key_info['info']['spend']}"
+        )
+        assert response_cost == key_info["info"]["spend"]
diff --git a/tests/test_openai_endpoints.py b/tests/test_openai_endpoints.py
index 5a91bffa7..67d7c4db9 100644
--- a/tests/test_openai_endpoints.py
+++ b/tests/test_openai_endpoints.py
@@ -68,6 +68,7 @@ async def chat_completion(session, key):
 
         if status != 200:
             raise Exception(f"Request did not return a 200 status code: {status}")
+        return await response.json()
 
 
 @pytest.mark.asyncio

From d52f5234b463e0b9a49e6ca2570a99ef3ccb46ad Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 16:14:01 -0800
Subject: [PATCH 14/23] fix(utils.py): fix double hashing issue on spend logs,
 streaming usage metadata logging iss ue for spend logs

---
 litellm/proxy/proxy_server.py |  1 +
 litellm/proxy/utils.py        |  2 +-
 litellm/utils.py              | 29 +++++++++++++++++++++--------
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index af6d3fd3a..fa082b49c 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -699,6 +699,7 @@ async def update_database(
                     valid_token.spend = new_spend
                     user_api_key_cache.set_cache(key=token, value=valid_token)
 
+        ### UPDATE SPEND LOGS ###
         async def _insert_spend_log_to_db():
             # Helper to generate payload to log
             verbose_proxy_logger.debug("inserting spend log to db")
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index c19137d57..fb5b523a7 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -834,7 +834,7 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
     usage = response_obj["usage"]
     id = response_obj.get("id", str(uuid.uuid4()))
     api_key = metadata.get("user_api_key", "")
-    if api_key is not None and type(api_key) == str:
+    if api_key is not None and isinstance(api_key, str) and api_key.startswith("sk-"):
         # hash the api_key
         api_key = hash_token(api_key)
 
diff --git a/litellm/utils.py b/litellm/utils.py
index 762f94af4..76952e1bf 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1500,14 +1500,27 @@ class Logging:
                             end_time=end_time,
                         )
                 if callable(callback):  # custom logger functions
-                    await customLogger.async_log_event(
-                        kwargs=self.model_call_details,
-                        response_obj=result,
-                        start_time=start_time,
-                        end_time=end_time,
-                        print_verbose=print_verbose,
-                        callback_func=callback,
-                    )
+                    if self.stream:
+                        if "complete_streaming_response" in self.model_call_details:
+                            await customLogger.async_log_event(
+                                kwargs=self.model_call_details,
+                                response_obj=self.model_call_details[
+                                    "complete_streaming_response"
+                                ],
+                                start_time=start_time,
+                                end_time=end_time,
+                                print_verbose=print_verbose,
+                                callback_func=callback,
+                            )
+                    else:
+                        await customLogger.async_log_event(
+                            kwargs=self.model_call_details,
+                            response_obj=result,
+                            start_time=start_time,
+                            end_time=end_time,
+                            print_verbose=print_verbose,
+                            callback_func=callback,
+                        )
                 if callback == "dynamodb":
                     global dynamoLogger
                     if dynamoLogger is None:

From e723df30f35ef3c5931b67a9da908da3667ea193 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 16:14:39 -0800
Subject: [PATCH 15/23] (feat) ui improvements

---
 ui/admin.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/ui/admin.py b/ui/admin.py
index 59fa034e1..674bba7fe 100644
--- a/ui/admin.py
+++ b/ui/admin.py
@@ -210,15 +210,16 @@ def spend_per_key():
                 spend_df = pd.DataFrame(spend_per_key)
 
                 # Display the spend per key as a graph
-                st.write("Spend per Key - Top 10:")
+                st.write("Spend ($) per Key:")
                 top_10_df = spend_df.nlargest(10, "spend")
                 fig = px.bar(
                     top_10_df,
                     x="token",
                     y="spend",
                     title="Top 10 Spend per Key",
-                    height=500,  # Adjust the height
-                    width=800,  # Adjust the width)
+                    height=550,  # Adjust the height
+                    width=1200,  # Adjust the width)
+                    hover_data=["token", "spend", "user_id", "team_id"],
                 )
                 st.plotly_chart(fig)
 
@@ -400,12 +401,12 @@ def admin_page(is_admin="NOT_GIVEN", input_api_url=None, input_proxy_key=None):
         "Go to",
         (
             "Connect to Proxy",
-            "Update Config",
-            "Add Models",
-            "List Models",
-            "Create Key",
             "View Spend Per Key",
             "View Spend Per User",
+            "List Models",
+            "Update Config",
+            "Add Models",
+            "Create Key",
             "End-User Auth",
         ),
     )

From 7e0adbb9bdfa690534728f95d0403dd7e3d880f5 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 16:19:32 -0800
Subject: [PATCH 16/23] fix(proxy/utils.py): remove original auth sk-.. key
 before logging to spend logs

---
 litellm/proxy/utils.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index fb5b523a7..1c504ca93 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -838,6 +838,11 @@ def get_logging_payload(kwargs, response_obj, start_time, end_time):
         # hash the api_key
         api_key = hash_token(api_key)
 
+    if "headers" in metadata and "authorization" in metadata["headers"]:
+        metadata["headers"].pop(
+            "authorization"
+        )  # do not store the original `sk-..` api key in the db
+
     payload = {
         "request_id": id,
         "call_type": call_type,

From 6a7126af9311aaac951b1b12f603ad2f3779ee81 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 16:24:13 -0800
Subject: [PATCH 17/23] (fix) UI

---
 ui/admin.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ui/admin.py b/ui/admin.py
index 674bba7fe..8b5c6b3ab 100644
--- a/ui/admin.py
+++ b/ui/admin.py
@@ -210,7 +210,7 @@ def spend_per_key():
                 spend_df = pd.DataFrame(spend_per_key)
 
                 # Display the spend per key as a graph
-                st.write("Spend ($) per Key:")
+                st.header("Spend ($) per API Key:")
                 top_10_df = spend_df.nlargest(10, "spend")
                 fig = px.bar(
                     top_10_df,

From 8ae8edfdb486af77af3371ab93c50f4ae2429ab2 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 16:27:25 -0800
Subject: [PATCH 18/23] (fix) add doc string for /spend/keys

---
 litellm/proxy/proxy_server.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index a69e37958..cf2463226 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2311,6 +2311,15 @@ async def info_key_fn(
     dependencies=[Depends(user_api_key_auth)],
 )
 async def spend_key_fn():
+    """
+    View all keys created, ordered by spend
+
+    Example Request: 
+    ```
+    curl -X GET "http://0.0.0.0:8000/spend/keys" \
+-H "Authorization: Bearer sk-1234"
+    ```
+    """
     global prisma_client
     try:
         if prisma_client is None:

From f47db44b4f6dd22ebcfdb9b16402fa8b4f89dd92 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 16:27:45 -0800
Subject: [PATCH 19/23] test(test_keys.py): fix streaming test

---
 tests/test_keys.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/test_keys.py b/tests/test_keys.py
index f06b6721e..a0bf7387d 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -98,6 +98,8 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
         {"role": "system", "content": "You are a helpful assistant."},
         {"role": "user", "content": "Hello!"},
     ]
+    prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
+    assert prompt_tokens == 19
     data = {
         "model": model,
         "messages": messages,
@@ -110,7 +112,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
         content += chunk.choices[0].delta.content or ""
 
     print(f"content: {content}")
-    prompt_tokens = litellm.token_counter(model="azure/gpt-35-turbo", messages=messages)
+
     completion_tokens = litellm.token_counter(
         model="azure/gpt-35-turbo", text=content, count_response_tokens=True
     )
@@ -249,7 +251,7 @@ async def test_key_info_spend_values():
         )
         print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
         prompt_cost, completion_cost = litellm.cost_per_token(
-            model="azure/gpt-35-turbo",
+            model="gpt-35-turbo",
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
         )

From 7b8353e5c662632cb423277fdd6cbbe8aa9bcf0b Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 16:52:08 -0800
Subject: [PATCH 20/23] Updated config.yml

---
 .circleci/config.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index c2433c7ad..b923a73b8 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -114,6 +114,7 @@ jobs:
             pip install "pytest==7.3.1"
             pip install "pytest-asyncio==0.21.1"
             pip install aiohttp
+            pip install openai
             # Run pytest and generate JUnit XML report
       - run:
           name: Build Docker image

From 0e9339b39096adca75a9db7ab24468724753b68a Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Tue, 23 Jan 2024 16:57:51 -0800
Subject: [PATCH 21/23] (feat) /spend/logs

---
 litellm/proxy/proxy_server.py | 55 +++++++++++++++++++++++++++++++++++
 litellm/proxy/utils.py        | 20 ++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index cf2463226..a23d1b5f1 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -2338,6 +2338,61 @@ async def spend_key_fn():
         )
 
 
+@router.get(
+    "/spend/logs",
+    tags=["Budget & Spend Tracking"],
+    dependencies=[Depends(user_api_key_auth)],
+)
+async def view_spend_logs(
+    request_id: Optional[str] = fastapi.Query(
+        default=None,
+        description="request_id to get spend logs for specific request_id. If none passed then pass spend logs for all requests",
+    ),
+):
+    """
+    View all spend logs, if request_id is provided, only logs for that request_id will be returned
+
+    Example Request for all logs
+    ```
+    curl -X GET "http://0.0.0.0:8000/spend/logs" \
+-H "Authorization: Bearer sk-1234"
+    ```
+
+    Example Request for specific request_id
+    ```
+    curl -X GET "http://0.0.0.0:8000/spend/logs?request_id=chatcmpl-6dcb2540-d3d7-4e49-bb27-291f863f112e" \
+-H "Authorization: Bearer sk-1234"
+    ```
+    """
+    global prisma_client
+    try:
+        if prisma_client is None:
+            raise Exception(
+                f"Database not connected. Connect a database to your proxy - https://docs.litellm.ai/docs/simple_proxy#managing-auth---virtual-keys"
+            )
+        spend_logs = []
+        if request_id is not None:
+            spend_log = await prisma_client.get_data(
+                table_name="spend",
+                query_type="find_unique",
+                request_id=request_id,
+            )
+            return [spend_log]
+        else:
+            spend_logs = await prisma_client.get_data(
+                table_name="spend", query_type="find_all"
+            )
+            return spend_logs
+
+        return None
+
+    except Exception as e:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail={"error": str(e)},
+        )
+
+
 #### USER MANAGEMENT ####
 @router.post(
     "/user/new",
diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 2a5495919..aecb6978b 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -361,7 +361,8 @@ class PrismaClient:
         self,
         token: Optional[str] = None,
         user_id: Optional[str] = None,
-        table_name: Optional[Literal["user", "key", "config"]] = None,
+        request_id: Optional[str] = None,
+        table_name: Optional[Literal["user", "key", "config", "spend"]] = None,
         query_type: Literal["find_unique", "find_all"] = "find_unique",
     ):
         try:
@@ -411,6 +412,23 @@ class PrismaClient:
                     }
                 )
                 return response
+            elif table_name == "spend":
+                verbose_proxy_logger.debug(
+                    f"PrismaClient: get_data: table_name == 'spend'"
+                )
+                if request_id is not None:
+                    response = await self.db.litellm_spendlogs.find_unique(  # type: ignore
+                        where={
+                            "request_id": request_id,
+                        }
+                    )
+                    return response
+                else:
+                    response = await self.db.litellm_spendlogs.find_many(  # type: ignore
+                        order={"startTime": "desc"},
+                    )
+                    return response
+
         except Exception as e:
             print_verbose(f"LiteLLM Prisma Client Exception: {e}")
             import traceback

From 5defa93a9d997a80ef5336929d91f85fd6f000b3 Mon Sep 17 00:00:00 2001
From: Krish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 16:59:56 -0800
Subject: [PATCH 22/23] Updated config.yml

---
 .circleci/config.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index b923a73b8..c7417a62f 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -115,6 +115,24 @@ jobs:
             pip install "pytest-asyncio==0.21.1"
             pip install aiohttp
             pip install openai
+            python -m pip install --upgrade pip
+            python -m pip install -r .circleci/requirements.txt
+            pip install "pytest==7.3.1"
+            pip install "pytest-asyncio==0.21.1"
+            pip install mypy
+            pip install "google-generativeai>=0.3.2"
+            pip install "google-cloud-aiplatform>=1.38.0"
+            pip install "boto3>=1.28.57"
+            pip install langchain
+            pip install "langfuse>=2.0.0"
+            pip install numpydoc
+            pip install prisma            
+            pip install "httpx==0.24.1"
+            pip install "gunicorn==21.2.0"
+            pip install "anyio==3.7.1"
+            pip install "aiodynamo==23.10.1"
+            pip install "asyncio==3.4.3"
+            pip install "PyGithub==1.59.1"
             # Run pytest and generate JUnit XML report
       - run:
           name: Build Docker image

From d6844f43c8926ec4d58d4a27d2219cb051133d19 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Tue, 23 Jan 2024 17:46:14 -0800
Subject: [PATCH 23/23] test(test_keys.py): use correct model name for token
 counting

---
 litellm/proxy/utils.py |  2 +-
 litellm/utils.py       | 22 ++++++++++++++++++----
 tests/test_keys.py     | 11 +++++------
 3 files changed, 24 insertions(+), 11 deletions(-)

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
index 2b34acceb..9aef0304c 100644
--- a/litellm/proxy/utils.py
+++ b/litellm/proxy/utils.py
@@ -556,7 +556,7 @@ class PrismaClient:
                     where={"token": token},  # type: ignore
                     data={**db_data},  # type: ignore
                 )
-                print_verbose(
+                verbose_proxy_logger.debug(
                     "\033[91m"
                     + f"DB Token Table update succeeded {response}"
                     + "\033[0m"
diff --git a/litellm/utils.py b/litellm/utils.py
index cca8bc85e..7a6b12a82 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2938,17 +2938,25 @@ def cost_per_token(
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model_with_provider in model_cost_ref:
-        print_verbose(f"Looking up model={model_with_provider} in model_cost_map")
+        verbose_logger.debug(
+            f"Looking up model={model_with_provider} in model_cost_map"
+        )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model_with_provider]["input_cost_per_token"] * prompt_tokens
         )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model_with_provider]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
         completion_tokens_cost_usd_dollar = (
             model_cost_ref[model_with_provider]["output_cost_per_token"]
             * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif "ft:gpt-3.5-turbo" in model:
-        print_verbose(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an OpenAI FinteTuned LLM")
         # fuzzy match ft:gpt-3.5-turbo:abcd-id-cool-litellm
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref["ft:gpt-3.5-turbo"]["input_cost_per_token"] * prompt_tokens
@@ -2959,17 +2967,23 @@ def cost_per_token(
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_llms:
-        print_verbose(f"Cost Tracking: {model} is an Azure LLM")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure LLM")
         model = litellm.azure_llms[model]
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['input_cost_per_token']} for prompt_tokens={prompt_tokens}"
+        )
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
         )
+        verbose_logger.debug(
+            f"applying cost={model_cost_ref[model]['output_cost_per_token']} for completion_tokens={completion_tokens}"
+        )
         completion_tokens_cost_usd_dollar = (
             model_cost_ref[model]["output_cost_per_token"] * completion_tokens
         )
         return prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar
     elif model in litellm.azure_embedding_models:
-        print_verbose(f"Cost Tracking: {model} is an Azure Embedding Model")
+        verbose_logger.debug(f"Cost Tracking: {model} is an Azure Embedding Model")
         model = litellm.azure_embedding_models[model]
         prompt_tokens_cost_usd_dollar = (
             model_cost_ref[model]["input_cost_per_token"] * prompt_tokens
diff --git a/tests/test_keys.py b/tests/test_keys.py
index a0bf7387d..917c50823 100644
--- a/tests/test_keys.py
+++ b/tests/test_keys.py
@@ -2,7 +2,7 @@
 ## Tests /key endpoints.
 
 import pytest
-import asyncio
+import asyncio, time
 import aiohttp
 from openai import AsyncOpenAI
 import sys, os
@@ -95,11 +95,10 @@ async def chat_completion(session, key, model="gpt-4"):
 async def chat_completion_streaming(session, key, model="gpt-4"):
     client = AsyncOpenAI(api_key=key, base_url="http://0.0.0.0:4000")
     messages = [
-        {"role": "system", "content": "You are a helpful assistant."},
-        {"role": "user", "content": "Hello!"},
+        {"role": "system", "content": "You are a helpful assistant"},
+        {"role": "user", "content": f"Hello! {time.time()}"},
     ]
     prompt_tokens = litellm.token_counter(model="gpt-35-turbo", messages=messages)
-    assert prompt_tokens == 19
     data = {
         "model": model,
         "messages": messages,
@@ -114,7 +113,7 @@ async def chat_completion_streaming(session, key, model="gpt-4"):
     print(f"content: {content}")
 
     completion_tokens = litellm.token_counter(
-        model="azure/gpt-35-turbo", text=content, count_response_tokens=True
+        model="gpt-35-turbo", text=content, count_response_tokens=True
     )
 
     return prompt_tokens, completion_tokens
@@ -251,7 +250,7 @@ async def test_key_info_spend_values():
         )
         print(f"prompt_tokens: {prompt_tokens}, completion_tokens: {completion_tokens}")
         prompt_cost, completion_cost = litellm.cost_per_token(
-            model="gpt-35-turbo",
+            model="azure/gpt-35-turbo",
             prompt_tokens=prompt_tokens,
             completion_tokens=completion_tokens,
         )