diff --git a/litellm/proxy/proxy_cli.py b/litellm/proxy/proxy_cli.py index 9ed794dac..cd1b4e7f4 100644 --- a/litellm/proxy/proxy_cli.py +++ b/litellm/proxy/proxy_cli.py @@ -116,7 +116,7 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers print(f"ollama called") run_ollama_serve() if test != False: - click.echo('LiteLLM: Making a test ChatCompletions request to your proxy') + click.echo('\nLiteLLM: Making a test ChatCompletions request to your proxy') import openai if test == True: # flag value set api_base = f"http://{host}:{port}" @@ -133,20 +133,21 @@ def run_server(host, port, api_base, api_version, model, alias, add_key, headers "content": "this is a test request, write a short poem" } ]) - click.echo(f'LiteLLM: response from proxy {response}') + click.echo(f'\nLiteLLM: response from proxy {response}') - click.echo(f'LiteLLM: response from proxy with streaming {response}') - # response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ - # { - # "role": "user", - # "content": "this is a test request, write a short poem" - # } - # ], - # stream=True, - # ) - # for chunk in response: - # click.echo(f'LiteLLM: streaming response from proxy {chunk}') + print("\n Making streaming request to proxy") + response = client.chat.completions.create(model="gpt-3.5-turbo", messages = [ + { + "role": "user", + "content": "this is a test request, write a short poem" + } + ], + stream=True, + ) + for chunk in response: + click.echo(f'LiteLLM: streaming response from proxy {chunk}') + print("\n making completion request to proxy") response = client.completions.create(model="gpt-3.5-turbo", prompt='this is a test request, write a short poem') print(response)