import openai api_base = f"http://0.0.0.0:8000" openai.api_base = api_base openai.api_key = "temp-key" print(openai.api_base) print(f'LiteLLM: response from proxy with streaming') response = openai.ChatCompletion.create( model="ollama/llama2", messages = [ { "role": "user", "content": "this is a test request, acknowledge that you got it" } ], stream=True ) for chunk in response: print(f'LiteLLM: streaming response from proxy {chunk}') response = openai.ChatCompletion.create( model="ollama/llama2", messages = [ { "role": "user", "content": "this is a test request, acknowledge that you got it" } ] ) print(f'LiteLLM: response from proxy {response}')