formatting improvements

This commit is contained in:
ishaan-jaff 2023-08-28 09:20:50 -07:00
parent 3e0a16acf4
commit a69b7ffcfa
17 changed files with 464 additions and 323 deletions

View file

@ -5,6 +5,7 @@ import time
from typing import Callable
from litellm.utils import ModelResponse
class BasetenError(Exception):
def __init__(self, status_code, message):
self.status_code = status_code
@ -15,9 +16,7 @@ class BasetenError(Exception):
class BasetenLLM:
def __init__(
self, encoding, logging_obj, api_key=None
):
def __init__(self, encoding, logging_obj, api_key=None):
self.encoding = encoding
self.completion_url_fragment_1 = "https://app.baseten.co/models/"
self.completion_url_fragment_2 = "/predict"
@ -55,13 +54,9 @@ class BasetenLLM:
for message in messages:
if "role" in message:
if message["role"] == "user":
prompt += (
f"{message['content']}"
)
prompt += f"{message['content']}"
else:
prompt += (
f"{message['content']}"
)
prompt += f"{message['content']}"
else:
prompt += f"{message['content']}"
data = {
@ -78,7 +73,9 @@ class BasetenLLM:
)
## COMPLETION CALL
response = requests.post(
self.completion_url_fragment_1 + model + self.completion_url_fragment_2, headers=self.headers, data=json.dumps(data)
self.completion_url_fragment_1 + model + self.completion_url_fragment_2,
headers=self.headers,
data=json.dumps(data),
)
if "stream" in optional_params and optional_params["stream"] == True:
return response.iter_lines()
@ -100,19 +97,33 @@ class BasetenLLM:
)
else:
if "model_output" in completion_response:
if isinstance(completion_response["model_output"], dict) and "data" in completion_response["model_output"] and isinstance(completion_response["model_output"]["data"], list):
model_response["choices"][0]["message"]["content"] = completion_response["model_output"]["data"][0]
if (
isinstance(completion_response["model_output"], dict)
and "data" in completion_response["model_output"]
and isinstance(
completion_response["model_output"]["data"], list
)
):
model_response["choices"][0]["message"][
"content"
] = completion_response["model_output"]["data"][0]
elif isinstance(completion_response["model_output"], str):
model_response["choices"][0]["message"]["content"] = completion_response["model_output"]
elif "completion" in completion_response and isinstance(completion_response["completion"], str):
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
model_response["choices"][0]["message"][
"content"
] = completion_response["model_output"]
elif "completion" in completion_response and isinstance(
completion_response["completion"], str
):
model_response["choices"][0]["message"][
"content"
] = completion_response["completion"]
else:
raise ValueError(f"Unable to parse response. Original response: {response.text}")
raise ValueError(
f"Unable to parse response. Original response: {response.text}"
)
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
prompt_tokens = len(
self.encoding.encode(prompt)
)
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
prompt_tokens = len(self.encoding.encode(prompt))
completion_tokens = len(
self.encoding.encode(model_response["choices"][0]["message"]["content"])
)