forked from phoenix/litellm-mirror
hosted vllm usage
This commit is contained in:
parent
c05606b587
commit
c45e2ed48c
2 changed files with 27 additions and 3 deletions
|
@ -346,9 +346,9 @@ def completion(
|
||||||
## RESPONSE OBJECT
|
## RESPONSE OBJECT
|
||||||
completion_response = response["choices"][0]["text"]
|
completion_response = response["choices"][0]["text"]
|
||||||
model_response["choices"][0]["message"]["content"] = completion_response
|
model_response["choices"][0]["message"]["content"] = completion_response
|
||||||
model_response["created"] = response["created"]
|
model_response["created"] = response.get("created", time.time())
|
||||||
model_response["model"] = model
|
model_response["model"] = model
|
||||||
model_response["usage"] = response["usage"]
|
model_response["usage"] = response.get("usage", 0)
|
||||||
response = model_response
|
response = model_response
|
||||||
elif (
|
elif (
|
||||||
"replicate" in model or
|
"replicate" in model or
|
||||||
|
|
|
@ -14,7 +14,6 @@ from litellm import embedding, completion, text_completion, completion_cost
|
||||||
|
|
||||||
litellm.vertex_project = "pathrise-convert-1606954137718"
|
litellm.vertex_project = "pathrise-convert-1606954137718"
|
||||||
litellm.vertex_location = "us-central1"
|
litellm.vertex_location = "us-central1"
|
||||||
litellm.use_client = True
|
|
||||||
|
|
||||||
user_message = "Write a short poem about the sky"
|
user_message = "Write a short poem about the sky"
|
||||||
messages = [{"content": user_message, "role": "user"}]
|
messages = [{"content": user_message, "role": "user"}]
|
||||||
|
@ -482,6 +481,31 @@ def test_completion_sagemaker():
|
||||||
|
|
||||||
# test_completion_vllm()
|
# test_completion_vllm()
|
||||||
|
|
||||||
|
# def test_completion_hosted_vllm():
|
||||||
|
# # this tests calling a server where vllm is hosted
|
||||||
|
# # this should make an openai.Completion() call to the specified api_base
|
||||||
|
# # send a request to this proxy server: https://replit.com/@BerriAI/openai-proxy#main.py
|
||||||
|
# # it checks if model == facebook/opt-125m and returns test passed
|
||||||
|
# try:
|
||||||
|
# # litellm.set_verbose = True
|
||||||
|
# response = completion(
|
||||||
|
# model="facebook/opt-125m",
|
||||||
|
# messages=messages,
|
||||||
|
# temperature=0.2,
|
||||||
|
# max_tokens=80,
|
||||||
|
# api_base="https://openai-proxy.berriai.repl.co/v1",
|
||||||
|
# custom_llm_provider="openai"
|
||||||
|
# )
|
||||||
|
# print(response)
|
||||||
|
|
||||||
|
# if response['choices'][0]['message']['content'] != "passed":
|
||||||
|
# # see https://replit.com/@BerriAI/openai-proxy#main.py
|
||||||
|
# pytest.fail(f"Error occurred: proxy server did not respond")
|
||||||
|
# except Exception as e:
|
||||||
|
# pytest.fail(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
# test_completion_hosted_vllm()
|
||||||
|
|
||||||
# def test_completion_custom_api_base():
|
# def test_completion_custom_api_base():
|
||||||
# try:
|
# try:
|
||||||
# response = completion(
|
# response = completion(
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue