From e125414611347c5291c1c175c0bf370a56fea110 Mon Sep 17 00:00:00 2001
From: ishaan-jaff <ishaanjaffer0324@gmail.com>
Date: Mon, 13 Nov 2023 10:58:18 -0800
Subject: [PATCH] (fix) proxy cli compatible with openai v1.0.0

---
 litellm/proxy/proxy_cli.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py
index 9ed794dac..cd1b4e7f4 100644
--- a/litellm/proxy/proxy_cli.py
+++ b/litellm/proxy/proxy_cli.py
@@ -116,7 +116,7 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
         print(f"ollama called")
         run_ollama_serve()
     if test != False:
-        click.echo('LiteLLM: Making a test ChatCompletions request to your proxy')
+        click.echo('\nLiteLLM: Making a test ChatCompletions request to your proxy')
         import openai
         if test == True: # flag value set
             api_base = f"http://{host}:{port}"
@@ -133,20 +133,21 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers
                 "content": "this is a test request, write a short poem"
             }
         ])
-        click.echo(f'LiteLLM: response from proxy {response}')
+        click.echo(f'\nLiteLLM: response from proxy {response}')
 
-        click.echo(f'LiteLLM: response from proxy with streaming {response}')
-        # response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
-        #     {
-        #         "role": "user",
-        #         "content": "this is a test request, write a short poem"
-        #     }
-        # ],
-        # stream=True,
-        # )
-        # for chunk in response:
-        #     click.echo(f'LiteLLM: streaming response from proxy {chunk}')
+        print("\n Making streaming request to proxy")
 
+        response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [
+            {
+                "role": "user",
+                "content": "this is a test request, write a short poem"
+            }
+        ],
+        stream=True,
+        )
+        for chunk in response:
+            click.echo(f'LiteLLM: streaming response from proxy {chunk}')
+        print("\n making completion request to proxy")
         response = client.completions.create(model="gpt-3.5-turbo", prompt='this is a test request, write a short poem')
         print(response)