mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-24 18:24:20 +00:00
fix(ollama.py): use tiktoken as backup for prompt token counting
This commit is contained in:
parent
22e0a6c7df
commit
22a900463e
4 changed files with 12 additions and 12 deletions
|
@ -217,7 +217,7 @@ def get_ollama_response(
|
|||
model_response["choices"][0]["message"]["content"] = response_json["response"]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + model
|
||||
prompt_tokens = response_json["prompt_eval_count"] # type: ignore
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(prompt))) # type: ignore
|
||||
completion_tokens = response_json["eval_count"]
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
|
@ -318,7 +318,7 @@ async def ollama_acompletion(url, data, model_response, encoding, logging_obj):
|
|||
]
|
||||
model_response["created"] = int(time.time())
|
||||
model_response["model"] = "ollama/" + data["model"]
|
||||
prompt_tokens = response_json["prompt_eval_count"] # type: ignore
|
||||
prompt_tokens = response_json.get("prompt_eval_count", len(encoding.encode(data["prompt"]))) # type: ignore
|
||||
completion_tokens = response_json["eval_count"]
|
||||
model_response["usage"] = litellm.Usage(
|
||||
prompt_tokens=prompt_tokens,
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
model_list:
|
||||
- model_name: text-davinci-003
|
||||
- model_name: gpt-3.5-turbo-instruct
|
||||
litellm_params:
|
||||
model: ollama/zephyr
|
||||
- model_name: gpt-4
|
||||
|
|
|
@ -602,7 +602,7 @@ def openai_text_completion_test():
|
|||
try:
|
||||
# OVERRIDE WITH DYNAMIC MAX TOKENS
|
||||
response_1 = litellm.completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
messages=[
|
||||
{
|
||||
"content": "Hello, how are you? Be as verbose as possible",
|
||||
|
@ -616,7 +616,7 @@ def openai_text_completion_test():
|
|||
|
||||
# USE CONFIG TOKENS
|
||||
response_2 = litellm.completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
messages=[
|
||||
{
|
||||
"content": "Hello, how are you? Be as verbose as possible",
|
||||
|
@ -630,7 +630,7 @@ def openai_text_completion_test():
|
|||
assert len(response_2_text) < len(response_1_text)
|
||||
|
||||
response_3 = litellm.completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
messages=[{"content": "Hello, how are you?", "role": "user"}],
|
||||
n=2,
|
||||
)
|
||||
|
|
|
@ -2682,7 +2682,7 @@ def test_completion_openai_prompt():
|
|||
try:
|
||||
print("\n text 003 test\n")
|
||||
response = text_completion(
|
||||
model="text-davinci-003", prompt="What's the weather in SF?"
|
||||
model="gpt-3.5-turbo-instruct", prompt="What's the weather in SF?"
|
||||
)
|
||||
print(response)
|
||||
response_str = response["choices"][0]["text"]
|
||||
|
@ -2700,7 +2700,7 @@ def test_completion_openai_engine_and_model():
|
|||
print("\n text 003 test\n")
|
||||
litellm.set_verbose = True
|
||||
response = text_completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
engine="anything",
|
||||
prompt="What's the weather in SF?",
|
||||
max_tokens=5,
|
||||
|
@ -2721,7 +2721,7 @@ def test_completion_openai_engine():
|
|||
print("\n text 003 test\n")
|
||||
litellm.set_verbose = True
|
||||
response = text_completion(
|
||||
engine="text-davinci-003", prompt="What's the weather in SF?", max_tokens=5
|
||||
engine="gpt-3.5-turbo-instruct", prompt="What's the weather in SF?", max_tokens=5
|
||||
)
|
||||
print(response)
|
||||
response_str = response["choices"][0]["text"]
|
||||
|
@ -2757,7 +2757,7 @@ def test_text_completion_basic():
|
|||
print("\n test 003 with echo and logprobs \n")
|
||||
litellm.set_verbose = False
|
||||
response = text_completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
prompt="good morning",
|
||||
max_tokens=10,
|
||||
logprobs=10,
|
||||
|
@ -2779,7 +2779,7 @@ def test_completion_text_003_prompt_array():
|
|||
try:
|
||||
litellm.set_verbose = False
|
||||
response = text_completion(
|
||||
model="text-davinci-003",
|
||||
model="gpt-3.5-turbo-instruct",
|
||||
prompt=token_prompt, # token prompt is a 2d list
|
||||
)
|
||||
print("\n\n response")
|
||||
|
@ -2857,7 +2857,7 @@ def test_text_completion_stream():
|
|||
# async def test_text_completion_async_stream():
|
||||
# try:
|
||||
# response = await atext_completion(
|
||||
# model="text-completion-openai/text-davinci-003",
|
||||
# model="text-completion-openai/gpt-3.5-turbo-instruct",
|
||||
# prompt="good morning",
|
||||
# stream=True,
|
||||
# max_tokens=10,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue