forked from phoenix/litellm-mirror
35 lines
774 B
Python
35 lines
774 B
Python
import openai
|
|
|
|
api_base = f"http://0.0.0.0:8000"
|
|
|
|
openai.api_base = api_base
|
|
openai.api_key = "temp-key"
|
|
print(openai.api_base)
|
|
|
|
|
|
print(f"LiteLLM: response from proxy with streaming")
|
|
response = openai.ChatCompletion.create(
|
|
model="ollama/llama2",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, acknowledge that you got it",
|
|
}
|
|
],
|
|
stream=True,
|
|
)
|
|
|
|
for chunk in response:
|
|
print(f"LiteLLM: streaming response from proxy {chunk}")
|
|
|
|
response = openai.ChatCompletion.create(
|
|
model="ollama/llama2",
|
|
messages=[
|
|
{
|
|
"role": "user",
|
|
"content": "this is a test request, acknowledge that you got it",
|
|
}
|
|
],
|
|
)
|
|
|
|
print(f"LiteLLM: response from proxy {response}")
|