mirror of
https://github.com/BerriAI/litellm.git
synced 2025-04-26 03:04:13 +00:00
docs update
This commit is contained in:
parent
1ba6b6761b
commit
bd77d5ac21
3 changed files with 17 additions and 13 deletions
|
@ -1,4 +1,4 @@
|
||||||
# Token Usage
|
# Completion Token Usage & Cost
|
||||||
By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
|
By default LiteLLM returns token usage in all completion requests ([See here](https://litellm.readthedocs.io/en/latest/output/))
|
||||||
|
|
||||||
However, we also expose 3 public helper functions to calculate token usage across providers:
|
However, we also expose 3 public helper functions to calculate token usage across providers:
|
||||||
|
@ -33,13 +33,19 @@ print(prompt_tokens_cost_usd_dollar, completion_tokens_cost_usd_dollar)
|
||||||
```
|
```
|
||||||
|
|
||||||
3. `completion_cost`
|
3. `completion_cost`
|
||||||
|
Accepts a `litellm.completion()` response and return a `float` of cost for the `completion` call
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from litellm import completion_cost
|
from litellm import completion, completion_cost
|
||||||
|
|
||||||
prompt = "Hey, how's it going"
|
response = completion(
|
||||||
completion = "Hi, I'm gpt - I am doing well"
|
model="together_ai/togethercomputer/llama-2-70b-chat",
|
||||||
cost_of_query = completion_cost(model="gpt-3.5-turbo", prompt=prompt, completion=completion))
|
messages=messages,
|
||||||
|
request_timeout=200,
|
||||||
print(cost_of_query)
|
)
|
||||||
|
# pass your response from completion to completion_cost
|
||||||
|
cost = completion_cost(completion_response=response)
|
||||||
|
formatted_string = f"${float(cost):.10f}"
|
||||||
|
print(formatted_string)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -28,11 +28,11 @@ def test_completion_togetherai_cost():
|
||||||
)
|
)
|
||||||
# Add any assertions here to check the response
|
# Add any assertions here to check the response
|
||||||
print(response)
|
print(response)
|
||||||
print("Completion Cost:")
|
print("Completion Cost: for togethercomputer/llama-2-70b-chat")
|
||||||
cost = completion_cost(completion_response=response)
|
cost = completion_cost(completion_response=response)
|
||||||
formatted_string = f"${float(cost):.10f}"
|
formatted_string = f"${float(cost):.10f}"
|
||||||
print(formatted_string)
|
print(formatted_string)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pytest.fail(f"Error occurred: {e}")
|
pytest.fail(f"Error occurred: {e}")
|
||||||
# test_completion_togetherai_cost()
|
test_completion_togetherai_cost()
|
|
@ -578,9 +578,7 @@ def get_model_params_and_category(model_name):
|
||||||
if params_match != None:
|
if params_match != None:
|
||||||
params_match = params_match.group(1)
|
params_match = params_match.group(1)
|
||||||
params_match = params_match.replace("b", "")
|
params_match = params_match.replace("b", "")
|
||||||
print(params_match)
|
|
||||||
params_billion = float(params_match)
|
params_billion = float(params_match)
|
||||||
|
|
||||||
# Determine the category based on the number of parameters
|
# Determine the category based on the number of parameters
|
||||||
if params_billion <= 3.0:
|
if params_billion <= 3.0:
|
||||||
category = "together-ai-up-to-3b"
|
category = "together-ai-up-to-3b"
|
||||||
|
@ -593,8 +591,8 @@ def get_model_params_and_category(model_name):
|
||||||
elif params_billion <= 70.0:
|
elif params_billion <= 70.0:
|
||||||
category = "together-ai-40.1b-70b"
|
category = "together-ai-40.1b-70b"
|
||||||
return category
|
return category
|
||||||
|
|
||||||
return "Model name not recognized or category not found."
|
return None
|
||||||
|
|
||||||
|
|
||||||
def token_counter(model, text):
|
def token_counter(model, text):
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue