mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-27 11:43:54 +00:00
formatting improvements
This commit is contained in:
parent
3e0a16acf4
commit
a69b7ffcfa
17 changed files with 464 additions and 323 deletions
|
@ -5,6 +5,7 @@ import time
|
|||
from typing import Callable
|
||||
from litellm.utils import ModelResponse
|
||||
|
||||
|
||||
class BasetenError(Exception):
|
||||
def __init__(self, status_code, message):
|
||||
self.status_code = status_code
|
||||
|
@ -15,9 +16,7 @@ class BasetenError(Exception):
|
|||
|
||||
|
||||
class BasetenLLM:
|
||||
def __init__(
|
||||
self, encoding, logging_obj, api_key=None
|
||||
):
|
||||
def __init__(self, encoding, logging_obj, api_key=None):
|
||||
self.encoding = encoding
|
||||
self.completion_url_fragment_1 = "https://app.baseten.co/models/"
|
||||
self.completion_url_fragment_2 = "/predict"
|
||||
|
@ -55,13 +54,9 @@ class BasetenLLM:
|
|||
for message in messages:
|
||||
if "role" in message:
|
||||
if message["role"] == "user":
|
||||
prompt += (
|
||||
f"{message['content']}"
|
||||
)
|
||||
prompt += f"{message['content']}"
|
||||
else:
|
||||
prompt += (
|
||||
f"{message['content']}"
|
||||
)
|
||||
prompt += f"{message['content']}"
|
||||
else:
|
||||
prompt += f"{message['content']}"
|
||||
data = {
|
||||
|
@ -78,7 +73,9 @@ class BasetenLLM:
|
|||
)
|
||||
## COMPLETION CALL
|
||||
response = requests.post(
|
||||
self.completion_url_fragment_1 + model + self.completion_url_fragment_2, headers=self.headers, data=json.dumps(data)
|
||||
self.completion_url_fragment_1 + model + self.completion_url_fragment_2,
|
||||
headers=self.headers,
|
||||
data=json.dumps(data),
|
||||
)
|
||||
if "stream" in optional_params and optional_params["stream"] == True:
|
||||
return response.iter_lines()
|
||||
|
@ -100,19 +97,33 @@ class BasetenLLM:
|
|||
)
|
||||
else:
|
||||
if "model_output" in completion_response:
|
||||
if isinstance(completion_response["model_output"], dict) and "data" in completion_response["model_output"] and isinstance(completion_response["model_output"]["data"], list):
|
||||
model_response["choices"][0]["message"]["content"] = completion_response["model_output"]["data"][0]
|
||||
if (
|
||||
isinstance(completion_response["model_output"], dict)
|
||||
and "data" in completion_response["model_output"]
|
||||
and isinstance(
|
||||
completion_response["model_output"]["data"], list
|
||||
)
|
||||
):
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = completion_response["model_output"]["data"][0]
|
||||
elif isinstance(completion_response["model_output"], str):
|
||||
model_response["choices"][0]["message"]["content"] = completion_response["model_output"]
|
||||
elif "completion" in completion_response and isinstance(completion_response["completion"], str):
|
||||
model_response["choices"][0]["message"]["content"] = completion_response["completion"]
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = completion_response["model_output"]
|
||||
elif "completion" in completion_response and isinstance(
|
||||
completion_response["completion"], str
|
||||
):
|
||||
model_response["choices"][0]["message"][
|
||||
"content"
|
||||
] = completion_response["completion"]
|
||||
else:
|
||||
raise ValueError(f"Unable to parse response. Original response: {response.text}")
|
||||
raise ValueError(
|
||||
f"Unable to parse response. Original response: {response.text}"
|
||||
)
|
||||
|
||||
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
||||
prompt_tokens = len(
|
||||
self.encoding.encode(prompt)
|
||||
)
|
||||
## CALCULATING USAGE - baseten charges on time, not tokens - have some mapping of cost here.
|
||||
prompt_tokens = len(self.encoding.encode(prompt))
|
||||
completion_tokens = len(
|
||||
self.encoding.encode(model_response["choices"][0]["message"]["content"])
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue