forked from phoenix/litellm-mirror
organize docs
This commit is contained in:
parent
4052b0d4c4
commit
6faeac4cf6
3 changed files with 72 additions and 67 deletions
|
@ -1,6 +1,6 @@
|
||||||
import Image from '@theme/IdealImage';
|
import Image from '@theme/IdealImage';
|
||||||
|
|
||||||
# Custom Pricing - Sagemaker, etc.
|
# Custom LLM Pricing - Sagemaker, Azure, etc
|
||||||
|
|
||||||
Use this to register custom pricing for models.
|
Use this to register custom pricing for models.
|
||||||
|
|
||||||
|
@ -16,39 +16,9 @@ LiteLLM already has pricing for any model in our [model cost map](https://github
|
||||||
|
|
||||||
:::
|
:::
|
||||||
|
|
||||||
## Quick Start
|
## Cost Per Second (e.g. Sagemaker)
|
||||||
|
|
||||||
Register custom pricing for sagemaker completion model.
|
### Usage with LiteLLM Proxy Server
|
||||||
|
|
||||||
For cost per second pricing, you **just** need to register `input_cost_per_second`.
|
|
||||||
|
|
||||||
```python
|
|
||||||
# !pip install boto3
|
|
||||||
from litellm import completion, completion_cost
|
|
||||||
|
|
||||||
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
|
||||||
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
|
||||||
os.environ["AWS_REGION_NAME"] = ""
|
|
||||||
|
|
||||||
|
|
||||||
def test_completion_sagemaker():
|
|
||||||
try:
|
|
||||||
print("testing sagemaker")
|
|
||||||
response = completion(
|
|
||||||
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
|
|
||||||
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
|
||||||
input_cost_per_second=0.000420,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
cost = completion_cost(completion_response=response)
|
|
||||||
print(cost)
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
```
|
|
||||||
|
|
||||||
### Usage with OpenAI Proxy Server
|
|
||||||
|
|
||||||
**Step 1: Add pricing to config.yaml**
|
**Step 1: Add pricing to config.yaml**
|
||||||
```yaml
|
```yaml
|
||||||
|
@ -75,38 +45,7 @@ litellm /path/to/config.yaml
|
||||||
|
|
||||||
## Cost Per Token (e.g. Azure)
|
## Cost Per Token (e.g. Azure)
|
||||||
|
|
||||||
|
### Usage with LiteLLM Proxy Server
|
||||||
```python
|
|
||||||
# !pip install boto3
|
|
||||||
from litellm import completion, completion_cost
|
|
||||||
|
|
||||||
## set ENV variables
|
|
||||||
os.environ["AZURE_API_KEY"] = ""
|
|
||||||
os.environ["AZURE_API_BASE"] = ""
|
|
||||||
os.environ["AZURE_API_VERSION"] = ""
|
|
||||||
|
|
||||||
|
|
||||||
def test_completion_azure_model():
|
|
||||||
try:
|
|
||||||
print("testing azure custom pricing")
|
|
||||||
# azure call
|
|
||||||
response = completion(
|
|
||||||
model = "azure/<your_deployment_name>",
|
|
||||||
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
|
||||||
input_cost_per_token=0.005,
|
|
||||||
output_cost_per_token=1,
|
|
||||||
)
|
|
||||||
# Add any assertions here to check the response
|
|
||||||
print(response)
|
|
||||||
cost = completion_cost(completion_response=response)
|
|
||||||
print(cost)
|
|
||||||
except Exception as e:
|
|
||||||
raise Exception(f"Error occurred: {e}")
|
|
||||||
|
|
||||||
test_completion_azure_model()
|
|
||||||
```
|
|
||||||
|
|
||||||
### Usage with OpenAI Proxy Server
|
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
model_list:
|
model_list:
|
||||||
|
|
65
docs/my-website/docs/sdk_custom_pricing.md
Normal file
65
docs/my-website/docs/sdk_custom_pricing.md
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
# Custom Pricing - SageMaker, Azure, etc
|
||||||
|
|
||||||
|
Register custom pricing for sagemaker completion model.
|
||||||
|
|
||||||
|
For cost per second pricing, you **just** need to register `input_cost_per_second`.
|
||||||
|
|
||||||
|
```python
|
||||||
|
# !pip install boto3
|
||||||
|
from litellm import completion, completion_cost
|
||||||
|
|
||||||
|
os.environ["AWS_ACCESS_KEY_ID"] = ""
|
||||||
|
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
|
||||||
|
os.environ["AWS_REGION_NAME"] = ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_sagemaker():
|
||||||
|
try:
|
||||||
|
print("testing sagemaker")
|
||||||
|
response = completion(
|
||||||
|
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
|
||||||
|
messages=[{"role": "user", "content": "Hey, how's it going?"}],
|
||||||
|
input_cost_per_second=0.000420,
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
cost = completion_cost(completion_response=response)
|
||||||
|
print(cost)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Cost Per Token (e.g. Azure)
|
||||||
|
|
||||||
|
|
||||||
|
```python
|
||||||
|
# !pip install boto3
|
||||||
|
from litellm import completion, completion_cost
|
||||||
|
|
||||||
|
## set ENV variables
|
||||||
|
os.environ["AZURE_API_KEY"] = ""
|
||||||
|
os.environ["AZURE_API_BASE"] = ""
|
||||||
|
os.environ["AZURE_API_VERSION"] = ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_completion_azure_model():
|
||||||
|
try:
|
||||||
|
print("testing azure custom pricing")
|
||||||
|
# azure call
|
||||||
|
response = completion(
|
||||||
|
model = "azure/<your_deployment_name>",
|
||||||
|
messages = [{ "content": "Hello, how are you?","role": "user"}]
|
||||||
|
input_cost_per_token=0.005,
|
||||||
|
output_cost_per_token=1,
|
||||||
|
)
|
||||||
|
# Add any assertions here to check the response
|
||||||
|
print(response)
|
||||||
|
cost = completion_cost(completion_response=response)
|
||||||
|
print(cost)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f"Error occurred: {e}")
|
||||||
|
|
||||||
|
test_completion_azure_model()
|
||||||
|
```
|
|
@ -42,6 +42,7 @@ const sidebars = {
|
||||||
"proxy/configs",
|
"proxy/configs",
|
||||||
"proxy/reliability",
|
"proxy/reliability",
|
||||||
"proxy/cost_tracking",
|
"proxy/cost_tracking",
|
||||||
|
"proxy/custom_pricing",
|
||||||
"proxy/self_serve",
|
"proxy/self_serve",
|
||||||
"proxy/virtual_keys",
|
"proxy/virtual_keys",
|
||||||
{
|
{
|
||||||
|
@ -183,7 +184,6 @@ const sidebars = {
|
||||||
"anthropic_completion"
|
"anthropic_completion"
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"proxy/custom_pricing",
|
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
label: "Secret Manager",
|
label: "Secret Manager",
|
||||||
|
@ -199,6 +199,8 @@ const sidebars = {
|
||||||
"routing",
|
"routing",
|
||||||
"scheduler",
|
"scheduler",
|
||||||
"set_keys",
|
"set_keys",
|
||||||
|
"completion/token_usage",
|
||||||
|
"sdk_custom_pricing",
|
||||||
"budget_manager",
|
"budget_manager",
|
||||||
"caching/all_caches",
|
"caching/all_caches",
|
||||||
{
|
{
|
||||||
|
@ -208,7 +210,6 @@ const sidebars = {
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
"completion/token_usage",
|
|
||||||
"load_test",
|
"load_test",
|
||||||
{
|
{
|
||||||
type: "category",
|
type: "category",
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue