From ce426f8b07f133bd3b817a41c13aeeef509d946b Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 11:44:20 +0530
Subject: [PATCH 01/19] (fix) s3 log cache hits

---
 litellm/integrations/s3.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py
index e7f607b41..db40ae832 100644
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@@ -118,7 +118,10 @@ class S3Logger:
                 except:
                     # non blocking if it can't cast to a str
                     pass
-            s3_object_key = payload["id"]
+
+            s3_object_key = (
+                payload["id"] + "-time=" + str(start_time)
+            )  # we need the s3 key to include the time, so we log cache hits too
 
             import json
 

From cc78e003bf9cf8f1677694c070c6f4119aabe067 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 11:44:48 +0530
Subject: [PATCH 02/19] (test) s3 log cache hits

---
 litellm/tests/test_s3_logs.py | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/litellm/tests/test_s3_logs.py b/litellm/tests/test_s3_logs.py
index 2a919d127..ee040928b 100644
--- a/litellm/tests/test_s3_logs.py
+++ b/litellm/tests/test_s3_logs.py
@@ -20,8 +20,10 @@ def test_s3_logging():
     # since we are modifying stdout, and pytests runs tests in parallel
     # on circle ci - we only test litellm.acompletion()
     try:
-        # pre
         # redirect stdout to log_file
+        litellm.cache = litellm.Cache(
+            type="s3", s3_bucket_name="cache-bucket-litellm", s3_region_name="us-west-2"
+        )
 
         litellm.success_callback = ["s3"]
         litellm.s3_callback_params = {
@@ -35,10 +37,14 @@ def test_s3_logging():
 
         expected_keys = []
 
+        import time
+
+        curr_time = str(time.time())
+
         async def _test():
             return await litellm.acompletion(
                 model="gpt-3.5-turbo",
-                messages=[{"role": "user", "content": "This is a test"}],
+                messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
                 max_tokens=10,
                 temperature=0.7,
                 user="ishaan-2",
@@ -48,6 +54,19 @@ def test_s3_logging():
         print(f"response: {response}")
         expected_keys.append(response.id)
 
+        async def _test():
+            return await litellm.acompletion(
+                model="gpt-3.5-turbo",
+                messages=[{"role": "user", "content": f"This is a test {curr_time}"}],
+                max_tokens=10,
+                temperature=0.7,
+                user="ishaan-2",
+            )
+
+        response = asyncio.run(_test())
+        expected_keys.append(response.id)
+        print(f"response: {response}")
+
         # # streaming + async
         # async def _test2():
         #     response = await litellm.acompletion(
@@ -86,10 +105,17 @@ def test_s3_logging():
         )
         # Get the keys of the most recent objects
         most_recent_keys = [obj["Key"] for obj in objects]
+        print(most_recent_keys)
+        # for each key, get the part before "-" as the key. Do it safely
+        cleaned_keys = []
+        for key in most_recent_keys:
+            split_key = key.split("-time=")
+            cleaned_keys.append(split_key[0])
         print("\n most recent keys", most_recent_keys)
+        print("\n cleaned keys", cleaned_keys)
         print("\n Expected keys: ", expected_keys)
         for key in expected_keys:
-            assert key in most_recent_keys
+            assert key in cleaned_keys
     except Exception as e:
         pytest.fail(f"An exception occurred - {e}")
     finally:

From 40c740089474b74baf8bdb68a31e4aa3dc00753a Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 11 Jan 2024 12:51:29 +0530
Subject: [PATCH 03/19] fix(router.py): bump httpx pool limits

---
 litellm/router.py | 54 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 52 insertions(+), 2 deletions(-)

diff --git a/litellm/router.py b/litellm/router.py
index f63555509..d5b42343c 100644
--- a/litellm/router.py
+++ b/litellm/router.py
@@ -1363,6 +1363,12 @@ class Router:
                         api_version=api_version,
                         timeout=timeout,
                         max_retries=max_retries,
+                        http_client=httpx.AsyncClient(
+                            transport=AsyncCustomHTTPTransport(),
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                        ),  # type: ignore
                     )
                     self.cache.set_cache(
                         key=cache_key,
@@ -1378,6 +1384,12 @@ class Router:
                         api_version=api_version,
                         timeout=timeout,
                         max_retries=max_retries,
+                        http_client=httpx.Client(
+                            transport=CustomHTTPTransport(),
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                        ),  # type: ignore
                     )
                     self.cache.set_cache(
                         key=cache_key,
@@ -1393,6 +1405,12 @@ class Router:
                         api_version=api_version,
                         timeout=stream_timeout,
                         max_retries=max_retries,
+                        http_client=httpx.AsyncClient(
+                            transport=AsyncCustomHTTPTransport(),
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                        ),  # type: ignore
                     )
                     self.cache.set_cache(
                         key=cache_key,
@@ -1408,6 +1426,12 @@ class Router:
                         api_version=api_version,
                         timeout=stream_timeout,
                         max_retries=max_retries,
+                        http_client=httpx.Client(
+                            transport=CustomHTTPTransport(),
+                            limits=httpx.Limits(
+                                max_connections=1000, max_keepalive_connections=100
+                            ),
+                        ),  # type: ignore
                     )
                     self.cache.set_cache(
                         key=cache_key,
@@ -1471,9 +1495,10 @@ class Router:
                         timeout=stream_timeout,
                         max_retries=max_retries,
                         http_client=httpx.AsyncClient(
+                            transport=AsyncCustomHTTPTransport(),
                             limits=httpx.Limits(
                                 max_connections=1000, max_keepalive_connections=100
-                            )
+                            ),
                         ),
                     )
                     self.cache.set_cache(
@@ -1491,9 +1516,10 @@ class Router:
                         timeout=stream_timeout,
                         max_retries=max_retries,
                         http_client=httpx.Client(
+                            transport=CustomHTTPTransport(),
                             limits=httpx.Limits(
                                 max_connections=1000, max_keepalive_connections=100
-                            )
+                            ),
                         ),
                     )
                     self.cache.set_cache(
@@ -1513,6 +1539,12 @@ class Router:
                     base_url=api_base,
                     timeout=timeout,
                     max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        transport=AsyncCustomHTTPTransport(),
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                    ),  # type: ignore
                 )
                 self.cache.set_cache(
                     key=cache_key,
@@ -1527,6 +1559,12 @@ class Router:
                     base_url=api_base,
                     timeout=timeout,
                     max_retries=max_retries,
+                    http_client=httpx.Client(
+                        transport=CustomHTTPTransport(),
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                    ),  # type: ignore
                 )
                 self.cache.set_cache(
                     key=cache_key,
@@ -1542,6 +1580,12 @@ class Router:
                     base_url=api_base,
                     timeout=stream_timeout,
                     max_retries=max_retries,
+                    http_client=httpx.AsyncClient(
+                        transport=AsyncCustomHTTPTransport(),
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                    ),  # type: ignore
                 )
                 self.cache.set_cache(
                     key=cache_key,
@@ -1557,6 +1601,12 @@ class Router:
                     base_url=api_base,
                     timeout=stream_timeout,
                     max_retries=max_retries,
+                    http_client=httpx.Client(
+                        transport=CustomHTTPTransport(),
+                        limits=httpx.Limits(
+                            max_connections=1000, max_keepalive_connections=100
+                        ),
+                    ),  # type: ignore
                 )
                 self.cache.set_cache(
                     key=cache_key,

From f89385eed8dbf63242aacbd5ed936d4dbe84d1b7 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 14:22:37 +0530
Subject: [PATCH 04/19] (fix) acompletion kwargs type hints

---
 litellm/main.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/litellm/main.py b/litellm/main.py
index 2b53c3a5f..8342ab4d5 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -202,6 +202,7 @@ async def acompletion(
         - If `stream` is True, the function returns an async generator that yields completion lines.
     """
     loop = asyncio.get_event_loop()
+    custom_llm_provider = None
     # Adjusted to use explicit arguments instead of *args and **kwargs
     completion_kwargs = {
         "model": model,
@@ -241,7 +242,7 @@ async def acompletion(
         func_with_context = partial(ctx.run, func)
 
         _, custom_llm_provider, _, _ = get_llm_provider(
-            model=model, api_base=completion_kwargs.get("base_url", None)
+            model=model, api_base=kwargs.get("api_base", None)
         )
 
         if (

From 4a1541c4859ce4eac0ec3ca0686f5d6db667f6bd Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 14:39:08 +0530
Subject: [PATCH 05/19] (fix) retry gemini-pro-vision 3 times

---
 litellm/tests/test_google_ai_studio_gemini.py | 23 +++++++++++--------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/litellm/tests/test_google_ai_studio_gemini.py b/litellm/tests/test_google_ai_studio_gemini.py
index 7cebd2537..5012717d3 100644
--- a/litellm/tests/test_google_ai_studio_gemini.py
+++ b/litellm/tests/test_google_ai_studio_gemini.py
@@ -6,29 +6,34 @@ sys.path.insert(
 import litellm
 from dotenv import load_dotenv
 
+
 def generate_text():
     try:
+        litellm.set_verbose = True
         messages = [
             {
                 "role": "user",
                 "content": [
-                    {
-                        "type": "text",
-                        "text": "What is this image?"
-                    },
+                    {"type": "text", "text": "What is this image?"},
                     {
                         "type": "image_url",
                         "image_url": {
                             "url": "https://avatars.githubusercontent.com/u/17561003?v=4"
-                        }
-                    }
-                ]
+                        },
+                    },
+                ],
             }
         ]
-        response = litellm.completion(model="gemini/gemini-pro-vision", messages=messages, stop="Hello world")
+        response = litellm.completion(
+            model="gemini/gemini-pro-vision",
+            messages=messages,
+            stop="Hello world",
+            num_retries=3,
+        )
         print(response)
         assert isinstance(response.choices[0].message.content, str) == True
     except Exception as exception:
         raise Exception("An error occurred during text generation:", exception)
 
-generate_text()
+
+# generate_text()

From c46a3709192e4b9e364df3c06a1e0cd6f6b4243f Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 15:37:03 +0530
Subject: [PATCH 06/19] (docs) logging proxy input / output

---
 docs/my-website/docs/proxy/logging.md | 236 +++++++-------------------
 1 file changed, 61 insertions(+), 175 deletions(-)

diff --git a/docs/my-website/docs/proxy/logging.md b/docs/my-website/docs/proxy/logging.md
index 5aa78f73d..8bf0fcee2 100644
--- a/docs/my-website/docs/proxy/logging.md
+++ b/docs/my-website/docs/proxy/logging.md
@@ -7,10 +7,17 @@ import TabItem from '@theme/TabItem';
 
 Log Proxy Input, Output, Exceptions using Custom Callbacks, Langfuse, OpenTelemetry, LangFuse, DynamoDB, s3 Bucket
 
+- [Async Custom Callbacks](#custom-callback-class-async)
+- [Logging to Langfuse](#logging-proxy-inputoutput---langfuse)
+- [Logging to s3 Buckets](#logging-proxy-inputoutput---s3-buckets)
+- [Logging to DynamoDB](#logging-proxy-inputoutput---dynamodb)
+- [Logging to Sentry](#logging-proxy-inputoutput---sentry)
+- [Logging to Traceloop (OpenTelemetry)](#opentelemetry---traceloop)
+
 ## Custom Callback Class [Async]
 Use this when you want to run custom callbacks in `python`
 
-### Step 1 - Create your custom `litellm` callback class
+#### Step 1 - Create your custom `litellm` callback class
 We use `litellm.integrations.custom_logger` for this, **more details about litellm custom callbacks [here](https://docs.litellm.ai/docs/observability/custom_callback)**
 
 Define your custom callback class in a python file.
@@ -112,7 +119,7 @@ proxy_handler_instance = MyCustomHandler()
 # need to set litellm.callbacks = [proxy_handler_instance] # on the proxy
 ```
 
-### Step 2 - Pass your custom callback class in `config.yaml`
+#### Step 2 - Pass your custom callback class in `config.yaml`
 We pass the custom callback class defined in **Step1** to the config.yaml. 
 Set `callbacks` to `python_filename.logger_instance_name`
 
@@ -134,7 +141,7 @@ litellm_settings:
 
 ```
 
-### Step 3 - Start proxy + test request
+#### Step 3 - Start proxy + test request
 ```shell
 litellm --config proxy_config.yaml
 ```
@@ -167,7 +174,7 @@ On Success
     Proxy Metadata: {'user_api_key': None, 'headers': Headers({'host': '0.0.0.0:8000', 'user-agent': 'curl/7.88.1', 'accept': '*/*', 'authorization': 'Bearer sk-1234', 'content-length': '199', 'content-type': 'application/x-www-form-urlencoded'}), 'model_group': 'gpt-3.5-turbo', 'deployment': 'gpt-3.5-turbo-ModelID-gpt-3.5-turbo'}
 ```
 
-### Logging Proxy Request Object, Header, Url
+#### Logging Proxy Request Object, Header, Url
 
 Here's how you can access the `url`, `headers`, `request body` sent to the proxy for each request
 
@@ -211,7 +218,7 @@ class MyCustomHandler(CustomLogger):
 
 ```
 
-### Logging `model_info` set in config.yaml 
+#### Logging `model_info` set in config.yaml 
 
 Here is how to log the `model_info` set in your proxy `config.yaml`. Information on setting `model_info` on [config.yaml](https://docs.litellm.ai/docs/proxy/configs)
 
@@ -428,176 +435,6 @@ print(response)
 </TabItem>
 </Tabs>
 
-
-## OpenTelemetry - Traceloop
-
-Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
-
-We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop
-
-**Step 1** Install traceloop-sdk and set Traceloop API key
-
-```shell
-pip install traceloop-sdk -U
-```
-
-Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog)
-, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector)
-
-**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
-```yaml
-model_list:
- - model_name: gpt-3.5-turbo
-    litellm_params:
-      model: gpt-3.5-turbo
-litellm_settings:
-  success_callback: ["traceloop"]
-```
-
-**Step 3**: Start the proxy, make a test request
-
-Start proxy
-```shell
-litellm --config config.yaml --debug
-```
-
-Test Request
-```
-curl --location 'http://0.0.0.0:8000/chat/completions' \
-    --header 'Content-Type: application/json' \
-    --data ' {
-    "model": "gpt-3.5-turbo",
-    "messages": [
-        {
-        "role": "user",
-        "content": "what llm are you"
-        }
-    ]
-    }'
-```
-
-
-
-<!-- ### Step 1 Start OpenTelemetry Collecter Docker Container
-This container sends logs to your selected destination 
-
-#### Install OpenTelemetry Collecter Docker Image
-```shell
-docker pull otel/opentelemetry-collector:0.90.0
-docker run -p 127.0.0.1:4317:4317 -p 127.0.0.1:55679:55679 otel/opentelemetry-collector:0.90.0
-```
-
-#### Set Destination paths on OpenTelemetry Collecter
-
-Here's the OpenTelemetry yaml config to use with Elastic Search
-```yaml
-receivers:
-  otlp:
-    protocols:
-      grpc:
-        endpoint: 0.0.0.0:4317
-  
-processors:
-  batch:
-    timeout: 1s
-    send_batch_size: 1024
-
-exporters:
-  logging:
-    loglevel: debug
-  otlphttp/elastic:
-    endpoint: "<your elastic endpoint>"
-    headers: 
-      Authorization: "Bearer <elastic api key>"
-
-service:
-  pipelines:
-    metrics:
-      receivers: [otlp]
-      exporters: [logging, otlphttp/elastic]
-    traces:
-      receivers: [otlp]
-      exporters: [logging, otlphttp/elastic]
-    logs: 
-      receivers: [otlp]
-      exporters: [logging,otlphttp/elastic]
-```
-
-#### Start the OpenTelemetry container with config
-Run the following command to start your docker container. We pass `otel_config.yaml` from the previous step
-
-```shell
-docker run -p 4317:4317 \
-    -v $(pwd)/otel_config.yaml:/etc/otel-collector-config.yaml \
-    otel/opentelemetry-collector:latest \
-    --config=/etc/otel-collector-config.yaml
-```
-
-### Step 2 Configure LiteLLM proxy to log on OpenTelemetry
-
-#### Pip install opentelemetry
-```shell
-pip install opentelemetry-api opentelemetry-sdk opentelemetry-exporter-otlp -U
-```
-
-#### Set (OpenTelemetry) `otel=True` on the proxy `config.yaml`
-**Example config.yaml**
-
-```yaml
-model_list:
-  - model_name: gpt-3.5-turbo
-    litellm_params:
-      model: azure/gpt-turbo-small-eu
-      api_base: https://my-endpoint-europe-berri-992.openai.azure.com/
-      api_key: 
-      rpm: 6      # Rate limit for this deployment: in requests per minute (rpm)
-
-general_settings: 
-  otel: True      # set OpenTelemetry=True, on litellm Proxy
-
-```
-
-#### Set OTEL collector endpoint
-LiteLLM will read the `OTEL_ENDPOINT` environment variable to send data to your OTEL collector 
-
-```python
-os.environ['OTEL_ENDPOINT'] # defauls to 127.0.0.1:4317 if not provided
-```
-
-#### Start LiteLLM Proxy
-```shell
-litellm -config config.yaml
-```
-
-#### Run a test request to Proxy
-```shell
-curl --location 'http://0.0.0.0:8000/chat/completions' \
-    --header 'Authorization: Bearer sk-1244' \
-    --data ' {
-    "model": "gpt-3.5-turbo",
-    "messages": [
-        {
-        "role": "user",
-        "content": "request from LiteLLM testing"
-        }
-    ]
-    }'
-```
-
-
-#### Test & View Logs on OpenTelemetry Collecter
-On successfull logging you should be able to see this log on your `OpenTelemetry Collecter` Docker Container
-```shell
-Events:
-
-```
-
-### View Log on Elastic Search
-Here's the log view on Elastic Search. You can see the request `input`, `output` and `headers`
-
-<Image img={require('../../img/elastic_otel.png')} /> -->
-
-
 ## Logging Proxy Input/Output - s3 Buckets
 
 We will use the `--config` to set 
@@ -815,3 +652,52 @@ Test Request
 ```
 litellm --test
 ```
+
+## Logging Proxy Input/Output Traceloop (OpenTelemetry)
+
+Traceloop allows you to log LLM Input/Output in the OpenTelemetry format
+
+We will use the `--config` to set `litellm.success_callback = ["traceloop"]` this will log all successfull LLM calls to traceloop
+
+**Step 1** Install traceloop-sdk and set Traceloop API key
+
+```shell
+pip install traceloop-sdk -U
+```
+
+Traceloop outputs standard OpenTelemetry data that can be connected to your observability stack. Send standard OpenTelemetry from LiteLLM Proxy to [Traceloop](https://www.traceloop.com/docs/openllmetry/integrations/traceloop), [Dynatrace](https://www.traceloop.com/docs/openllmetry/integrations/dynatrace), [Datadog](https://www.traceloop.com/docs/openllmetry/integrations/datadog)
+, [New Relic](https://www.traceloop.com/docs/openllmetry/integrations/newrelic), [Honeycomb](https://www.traceloop.com/docs/openllmetry/integrations/honeycomb), [Grafana Tempo](https://www.traceloop.com/docs/openllmetry/integrations/grafana), [Splunk](https://www.traceloop.com/docs/openllmetry/integrations/splunk), [OpenTelemetry Collector](https://www.traceloop.com/docs/openllmetry/integrations/otel-collector)
+
+**Step 2**: Create a `config.yaml` file and set `litellm_settings`: `success_callback`
+```yaml
+model_list:
+ - model_name: gpt-3.5-turbo
+    litellm_params:
+      model: gpt-3.5-turbo
+litellm_settings:
+  success_callback: ["traceloop"]
+```
+
+**Step 3**: Start the proxy, make a test request
+
+Start proxy
+```shell
+litellm --config config.yaml --debug
+```
+
+Test Request
+```
+curl --location 'http://0.0.0.0:8000/chat/completions' \
+    --header 'Content-Type: application/json' \
+    --data ' {
+    "model": "gpt-3.5-turbo",
+    "messages": [
+        {
+        "role": "user",
+        "content": "what llm are you"
+        }
+    ]
+    }'
+```
+
+

From 1d9dad4af4a16535bb0b0aa242e79c9e6232b2d6 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 15:57:54 +0530
Subject: [PATCH 07/19] (feat) s3 logging - log cache hits

---
 litellm/integrations/s3.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/litellm/integrations/s3.py b/litellm/integrations/s3.py
index db40ae832..0187d13d6 100644
--- a/litellm/integrations/s3.py
+++ b/litellm/integrations/s3.py
@@ -93,6 +93,7 @@ class S3Logger:
             messages = kwargs.get("messages")
             optional_params = kwargs.get("optional_params", {})
             call_type = kwargs.get("call_type", "litellm.completion")
+            cache_hit = kwargs.get("cache_hit", False)
             usage = response_obj["usage"]
             id = response_obj.get("id", str(uuid.uuid4()))
 
@@ -100,6 +101,7 @@ class S3Logger:
             payload = {
                 "id": id,
                 "call_type": call_type,
+                "cache_hit": cache_hit,
                 "startTime": start_time,
                 "endTime": end_time,
                 "model": kwargs.get("model", ""),

From bb8eac0597fb20f3d370678ab188f4f5b98cdb54 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 16:57:51 +0530
Subject: [PATCH 08/19] (test) improve s3 logging test

---
 litellm/tests/test_s3_logs.py | 41 ++++++++++++++---------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/litellm/tests/test_s3_logs.py b/litellm/tests/test_s3_logs.py
index ee040928b..a5347fb08 100644
--- a/litellm/tests/test_s3_logs.py
+++ b/litellm/tests/test_s3_logs.py
@@ -67,31 +67,6 @@ def test_s3_logging():
         expected_keys.append(response.id)
         print(f"response: {response}")
 
-        # # streaming + async
-        # async def _test2():
-        #     response = await litellm.acompletion(
-        #         model="gpt-3.5-turbo",
-        #         messages=[{"role": "user", "content": "what llm are u"}],
-        #         max_tokens=10,
-        #         temperature=0.7,
-        #         user="ishaan-2",
-        #         stream=True,
-        #     )
-        #     async for chunk in response:
-        #         pass
-
-        # asyncio.run(_test2())
-
-        # aembedding()
-        # async def _test3():
-        #     return await litellm.aembedding(
-        #         model="text-embedding-ada-002", input=["hi"], user="ishaan-2"
-        #     )
-
-        # response = asyncio.run(_test3())
-        # expected_keys.append(response.id)
-        # time.sleep(1)
-
         import boto3
 
         s3 = boto3.client("s3")
@@ -114,8 +89,24 @@ def test_s3_logging():
         print("\n most recent keys", most_recent_keys)
         print("\n cleaned keys", cleaned_keys)
         print("\n Expected keys: ", expected_keys)
+        matches = 0
         for key in expected_keys:
             assert key in cleaned_keys
+
+            if key in cleaned_keys:
+                matches += 1
+                # remove the match key
+                cleaned_keys.remove(key)
+        # this asserts we log, the first request + the 2nd cached request
+        print("we had two matches ! passed ", matches)
+        assert matches == 2
+        try:
+            # cleanup s3 bucket in test
+            for key in most_recent_keys:
+                s3.delete_object(Bucket=bucket_name, Key=key)
+        except:
+            # don't let cleanup fail a test
+            pass
     except Exception as e:
         pytest.fail(f"An exception occurred - {e}")
     finally:

From 1e80c1fd005acdf8af74826f0e25c6eef0c677d8 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 17:17:16 +0530
Subject: [PATCH 09/19] =?UTF-8?q?bump:=20version=201.17.0=20=E2=86=92=201.?=
 =?UTF-8?q?17.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index af738d509..ab7b630db 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.17.0"
+version = "1.17.1"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"
@@ -60,7 +60,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.17.0"
+version = "1.17.1"
 version_files = [
     "pyproject.toml:^version"
 ]

From f297a4d174f824ced9a1de5c4348e554d2399c11 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 17:56:27 +0530
Subject: [PATCH 10/19] (feat) show args passed to litellm.completion,
 acompletion on call

---
 litellm/utils.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/litellm/utils.py b/litellm/utils.py
index fcf6e9dea..8f97ace43 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -1975,6 +1975,8 @@ def client(original_function):
 
     @wraps(original_function)
     def wrapper(*args, **kwargs):
+        # Prints Exactly what was passed to litellm function - don't execute any logic here - it should just print
+        print_args_passed_to_litellm(original_function, args, kwargs)
         start_time = datetime.datetime.now()
         result = None
         logging_obj = kwargs.get("litellm_logging_obj", None)
@@ -2170,6 +2172,7 @@ def client(original_function):
 
     @wraps(original_function)
     async def wrapper_async(*args, **kwargs):
+        print_args_passed_to_litellm(original_function, args, kwargs)
         start_time = datetime.datetime.now()
         result = None
         logging_obj = kwargs.get("litellm_logging_obj", None)
@@ -8275,3 +8278,29 @@ def transform_logprobs(hf_response):
         transformed_logprobs = token_info
 
     return transformed_logprobs
+
+
+def print_args_passed_to_litellm(original_function, args, kwargs):
+    try:
+        args_str = ", ".join(map(repr, args))
+        kwargs_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
+        print_verbose("\n")  # new line before
+        print_verbose("\033[92mRequest to litellm:\033[0m")
+        if args and kwargs:
+            print_verbose(
+                f"\033[92mlitellm.{original_function.__name__}({args_str}, {kwargs_str})\033[0m"
+            )
+        elif args:
+            print_verbose(
+                f"\033[92mlitellm.{original_function.__name__}({args_str})\033[0m"
+            )
+        elif kwargs:
+            print_verbose(
+                f"\033[92mlitellm.{original_function.__name__}({kwargs_str})\033[0m"
+            )
+        else:
+            print_verbose(f"\033[92mlitellm.{original_function.__name__}()\033[0m")
+        print_verbose("\n")  # new line after
+    except:
+        # This should always be non blocking
+        pass

From 1fb3547e48f16d95fc52e45dbab87cbb3b959a23 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 18:13:08 +0530
Subject: [PATCH 11/19] (feat) improve litellm verbose logs

---
 litellm/utils.py | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/litellm/utils.py b/litellm/utils.py
index 8f97ace43..8af329195 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -8282,6 +8282,26 @@ def transform_logprobs(hf_response):
 
 def print_args_passed_to_litellm(original_function, args, kwargs):
     try:
+        # we've already printed this for acompletion, don't print for completion
+        if (
+            "acompletion" in kwargs
+            and kwargs["acompletion"] == True
+            and original_function.__name__ == "completion"
+        ):
+            return
+        elif (
+            "aembedding" in kwargs
+            and kwargs["aembedding"] == True
+            and original_function.__name__ == "embedding"
+        ):
+            return
+        elif (
+            "aimg_generation" in kwargs
+            and kwargs["aimg_generation"] == True
+            and original_function.__name__ == "img_generation"
+        ):
+            return
+
         args_str = ", ".join(map(repr, args))
         kwargs_str = ", ".join(f"{key}={repr(value)}" for key, value in kwargs.items())
         print_verbose("\n")  # new line before

From 43533812a7a3872b79b1183d2db259441b6494a7 Mon Sep 17 00:00:00 2001
From: Krrish Dholakia <krrishdholakia@gmail.com>
Date: Thu, 11 Jan 2024 19:19:29 +0530
Subject: [PATCH 12/19] fix(proxy_cli.py): read db url from config, not just
 environment

---
 litellm/main.py            |  4 ++--
 litellm/proxy/proxy_cli.py | 33 ++++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 8342ab4d5..70264b312 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -268,10 +268,10 @@ async def acompletion(
             elif asyncio.iscoroutine(init_response):
                 response = await init_response
             else:
-                response = init_response
+                response = init_response  # type: ignore
         else:
             # Call the synchronous function using run_in_executor
-            response = await loop.run_in_executor(None, func_with_context)
+            response = await loop.run_in_executor(None, func_with_context)  # type: ignore
         # if kwargs.get("stream", False):  # return an async generator
         #     return _async_streaming(
         #         response=response,
diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index ade65ec96..c06ba7d32 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -6,7 +6,6 @@ from datetime import datetime
 import importlib
 from dotenv import load_dotenv
 
-
 sys.path.append(os.getcwd())
 
 config_filename = "litellm.secrets"
@@ -349,6 +348,38 @@ def run_server(
             raise ImportError(
                 "Uvicorn, gunicorn needs to be imported. Run - `pip 'litellm[proxy]'`"
             )
+
+        if config is not None:
+            """
+            Allow user to pass in db url via config
+
+            read from there and save it to os.env['DATABASE_URL']
+            """
+            try:
+                import yaml
+            except:
+                raise ImportError(
+                    "yaml needs to be imported. Run - `pip install 'litellm[proxy]'`"
+                )
+
+            if os.path.exists(config):
+                with open(config, "r") as config_file:
+                    config = yaml.safe_load(config_file)
+            general_settings = config.get("general_settings", {})
+            database_url = general_settings.get("database_url", None)
+            if database_url and database_url.startswith("os.environ/"):
+                original_dir = os.getcwd()
+                # set the working directory to where this script is
+                sys.path.insert(
+                    0, os.path.abspath("../..")
+                )  # Adds the parent directory to the system path - for litellm local dev
+                import litellm
+
+                database_url = litellm.get_secret(database_url)
+                os.chdir(original_dir)
+            if database_url is not None and isinstance(database_url, str):
+                os.environ["DATABASE_URL"] = database_url
+
         if os.getenv("DATABASE_URL", None) is not None:
             # run prisma db push, before starting server
             # Save the current working directory

From d14099f9b48ab6fbde1f67b2914b5cf4aeb4979d Mon Sep 17 00:00:00 2001
From: David Leen <dleen@netflix.com>
Date: Thu, 11 Jan 2024 16:20:50 +0100
Subject: [PATCH 13/19] Add explicit dependency on requests library

---
 poetry.lock    | 4 ++--
 pyproject.toml | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 24673701a..496815f9a 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -2689,4 +2689,4 @@ proxy = ["backoff", "fastapi", "gunicorn", "orjson", "pyyaml", "rq", "uvicorn"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<3.9.7 || >3.9.7"
-content-hash = "b49d09f51e8a57cdf883ab03cd9fecaf1ad007c3092d53347e30129e25adceab"
+content-hash = "f4d60cb3f552af0d2a4e4ef5c6f55696fd6e546b75ff7b4ec362c3549a63c92a"
diff --git a/pyproject.toml b/pyproject.toml
index ab7b630db..08b66e27e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -16,6 +16,7 @@ tokenizers = "*"
 click = "*"
 jinja2 = "^3.1.2"
 aiohttp = "*"
+requests = "^2.31.0"
 
 uvicorn = {version = "^0.22.0", optional = true}
 gunicorn = {version = "^21.2.0", optional = true}

From 6b87c13b9d5c904c9c66d9fce87fbd835b566ab0 Mon Sep 17 00:00:00 2001
From: David Leen <dleen@netflix.com>
Date: Thu, 11 Jan 2024 16:22:26 +0100
Subject: [PATCH 14/19] (fix) create httpx.Request instead of httpx.request

fixes #1420
---
 litellm/utils.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/litellm/utils.py b/litellm/utils.py
index fcf6e9dea..420147629 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2844,7 +2844,7 @@ def cost_per_token(model="", prompt_tokens=0, completion_tokens=0):
             response=httpx.Response(
                 status_code=404,
                 content=error_str,
-                request=httpx.request(method="cost_per_token", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                request=httpx.Request(method="cost_per_token", url="https://github.com/BerriAI/litellm"),  # type: ignore
             ),
             llm_provider="",
         )
@@ -4171,7 +4171,7 @@ def get_llm_provider(
                 response=httpx.Response(
                     status_code=400,
                     content=error_str,
-                    request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                    request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
                 ),
                 llm_provider="",
             )
@@ -4186,7 +4186,7 @@ def get_llm_provider(
                 response=httpx.Response(
                     status_code=400,
                     content=error_str,
-                    request=httpx.request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
+                    request=httpx.Request(method="completion", url="https://github.com/BerriAI/litellm"),  # type: ignore
                 ),
                 llm_provider="",
             )

From a876748bf57f045061e547c965e4a32c983502ad Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 22:56:18 +0530
Subject: [PATCH 15/19] v0

---
 litellm/llms/bedrock.py         | 4 ++--
 litellm/tests/test_embedding.py | 7 ++-----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index 99550280a..dd9c86200 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -2,7 +2,7 @@ import json, copy, types
 import os
 from enum import Enum
 import time
-from typing import Callable, Optional, Any
+from typing import Callable, Optional, Any, Union
 import litellm
 from litellm.utils import ModelResponse, get_secret, Usage
 from .prompt_templates.factory import prompt_factory, custom_prompt
@@ -714,7 +714,7 @@ def _embedding_func_single(
 
 def embedding(
     model: str,
-    input: list,
+    input: Union[list, str],
     api_key: Optional[str] = None,
     logging_obj=None,
     model_response=None,
diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 954a53e2a..50dd6ee06 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -189,10 +189,7 @@ def test_bedrock_embedding_titan():
         litellm.set_verbose = True
         response = embedding(
             model="amazon.titan-embed-text-v1",
-            input=[
-                "good morning from litellm, attempting to embed data",
-                "lets test a second string for good measure",
-            ],
+            input="good morning from litellm, attempting to embed data",
         )
         print(f"response:", response)
         assert isinstance(
@@ -206,7 +203,7 @@ def test_bedrock_embedding_titan():
         pytest.fail(f"Error occurred: {e}")
 
 
-# test_bedrock_embedding_titan()
+test_bedrock_embedding_titan()
 
 
 def test_bedrock_embedding_cohere():

From a9d812eb8da1bdbc31f7082bc470df56a76d5142 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 23:02:12 +0530
Subject: [PATCH 16/19] (fix) bedrock - embedding - support str input

---
 litellm/llms/bedrock.py | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/litellm/llms/bedrock.py b/litellm/llms/bedrock.py
index dd9c86200..617964a74 100644
--- a/litellm/llms/bedrock.py
+++ b/litellm/llms/bedrock.py
@@ -737,18 +737,28 @@ def embedding(
         aws_region_name=aws_region_name,
         aws_bedrock_runtime_endpoint=aws_bedrock_runtime_endpoint,
     )
-
-    ## Embedding Call
-    embeddings = [
-        _embedding_func_single(
-            model,
-            i,
-            optional_params=optional_params,
-            client=client,
-            logging_obj=logging_obj,
-        )
-        for i in input
-    ]  # [TODO]: make these parallel calls
+    if type(input) == str:
+        embeddings = [
+            _embedding_func_single(
+                model,
+                input,
+                optional_params=optional_params,
+                client=client,
+                logging_obj=logging_obj,
+            )
+        ]
+    else:
+        ## Embedding Call
+        embeddings = [
+            _embedding_func_single(
+                model,
+                i,
+                optional_params=optional_params,
+                client=client,
+                logging_obj=logging_obj,
+            )
+            for i in input
+        ]  # [TODO]: make these parallel calls
 
     ## Populate OpenAI compliant dictionary
     embedding_response = []

From 276d11946ee760e8c2f30e408c12ef6fe1462dcb Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 23:04:41 +0530
Subject: [PATCH 17/19] (test) bedrock - embedding with strings

---
 litellm/tests/test_embedding.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 50dd6ee06..28cafe02c 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -186,10 +186,12 @@ def test_cohere_embedding3():
 
 def test_bedrock_embedding_titan():
     try:
-        litellm.set_verbose = True
+        # this tests if we support str input for bedrock embedding
+        litellm.set_verbose = False
+        # DO NOT MAKE THE INPUT A LIST in this test
         response = embedding(
             model="amazon.titan-embed-text-v1",
-            input="good morning from litellm, attempting to embed data",
+            input="good morning from litellm, attempting to embed data",  # input should always be a string in this test
         )
         print(f"response:", response)
         assert isinstance(
@@ -199,6 +201,8 @@ def test_bedrock_embedding_titan():
         assert all(
             isinstance(x, float) for x in response["data"][0]["embedding"]
         ), "Expected response to be a list of floats"
+        print("Response Usage", response.usage)
+        assert response.usage.prompt_tokens == 11
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 
@@ -277,7 +281,7 @@ def test_aembedding():
         pytest.fail(f"Error occurred: {e}")
 
 
-test_aembedding()
+# test_aembedding()
 
 
 def test_aembedding_azure():

From b7567865deb00ae68a3e951b258a53eabb513870 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 23:12:57 +0530
Subject: [PATCH 18/19] (test) caching for bedrock/embedding str inputs

---
 litellm/tests/test_embedding.py | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/litellm/tests/test_embedding.py b/litellm/tests/test_embedding.py
index 28cafe02c..6505d432d 100644
--- a/litellm/tests/test_embedding.py
+++ b/litellm/tests/test_embedding.py
@@ -187,11 +187,15 @@ def test_cohere_embedding3():
 def test_bedrock_embedding_titan():
     try:
         # this tests if we support str input for bedrock embedding
-        litellm.set_verbose = False
+        litellm.set_verbose = True
+        litellm.enable_cache()
+        import time
+
+        current_time = str(time.time())
         # DO NOT MAKE THE INPUT A LIST in this test
         response = embedding(
-            model="amazon.titan-embed-text-v1",
-            input="good morning from litellm, attempting to embed data",  # input should always be a string in this test
+            model="bedrock/amazon.titan-embed-text-v1",
+            input=f"good morning from litellm, attempting to embed data {current_time}",  # input should always be a string in this test
         )
         print(f"response:", response)
         assert isinstance(
@@ -201,8 +205,23 @@ def test_bedrock_embedding_titan():
         assert all(
             isinstance(x, float) for x in response["data"][0]["embedding"]
         ), "Expected response to be a list of floats"
-        print("Response Usage", response.usage)
-        assert response.usage.prompt_tokens == 11
+
+        # this also tests if we can return a cache response for this scenario
+        import time
+
+        start_time = time.time()
+
+        response = embedding(
+            model="bedrock/amazon.titan-embed-text-v1",
+            input=f"good morning from litellm, attempting to embed data {current_time}",  # input should always be a string in this test
+        )
+        print(response)
+
+        end_time = time.time()
+        print(f"Embedding 2 response time: {end_time - start_time} seconds")
+
+        assert end_time - start_time < 0.1
+        litellm.disable_cache()
     except Exception as e:
         pytest.fail(f"Error occurred: {e}")
 

From 0a762695419e2187f9f82318c9e3a5194f65ce42 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Thu, 11 Jan 2024 23:25:02 +0530
Subject: [PATCH 19/19] =?UTF-8?q?bump:=20version=201.17.1=20=E2=86=92=201.?=
 =?UTF-8?q?17.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 08b66e27e..f6a305ca2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "litellm"
-version = "1.17.1"
+version = "1.17.2"
 description = "Library to easily interface with LLM API providers"
 authors = ["BerriAI"]
 license = "MIT License"
@@ -61,7 +61,7 @@ requires = ["poetry-core", "wheel"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.commitizen]
-version = "1.17.1"
+version = "1.17.2"
 version_files = [
     "pyproject.toml:^version"
 ]