organize docs

This commit is contained in:
Ishaan Jaff 2024-08-03 12:47:22 -07:00
parent 4052b0d4c4
commit 6faeac4cf6
3 changed files with 72 additions and 67 deletions

View file

@ -1,6 +1,6 @@
import Image from '@theme/IdealImage'; import Image from '@theme/IdealImage';
# Custom Pricing - Sagemaker, etc. # Custom LLM Pricing - Sagemaker, Azure, etc
Use this to register custom pricing for models. Use this to register custom pricing for models.
@ -16,39 +16,9 @@ LiteLLM already has pricing for any model in our [model cost map](https://github
::: :::
## Quick Start ## Cost Per Second (e.g. Sagemaker)
Register custom pricing for sagemaker completion model. ### Usage with LiteLLM Proxy Server
For cost per second pricing, you **just** need to register `input_cost_per_second`.
```python
# !pip install boto3
from litellm import completion, completion_cost
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
def test_completion_sagemaker():
try:
print("testing sagemaker")
response = completion(
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
input_cost_per_second=0.000420,
)
# Add any assertions here to check the response
print(response)
cost = completion_cost(completion_response=response)
print(cost)
except Exception as e:
raise Exception(f"Error occurred: {e}")
```
### Usage with OpenAI Proxy Server
**Step 1: Add pricing to config.yaml** **Step 1: Add pricing to config.yaml**
```yaml ```yaml
@ -75,38 +45,7 @@ litellm /path/to/config.yaml
## Cost Per Token (e.g. Azure) ## Cost Per Token (e.g. Azure)
### Usage with LiteLLM Proxy Server
```python
# !pip install boto3
from litellm import completion, completion_cost
## set ENV variables
os.environ["AZURE_API_KEY"] = ""
os.environ["AZURE_API_BASE"] = ""
os.environ["AZURE_API_VERSION"] = ""
def test_completion_azure_model():
try:
print("testing azure custom pricing")
# azure call
response = completion(
model = "azure/<your_deployment_name>",
messages = [{ "content": "Hello, how are you?","role": "user"}]
input_cost_per_token=0.005,
output_cost_per_token=1,
)
# Add any assertions here to check the response
print(response)
cost = completion_cost(completion_response=response)
print(cost)
except Exception as e:
raise Exception(f"Error occurred: {e}")
test_completion_azure_model()
```
### Usage with OpenAI Proxy Server
```yaml ```yaml
model_list: model_list:

View file

@ -0,0 +1,65 @@
# Custom Pricing - SageMaker, Azure, etc
Register custom pricing for sagemaker completion model.
For cost per second pricing, you **just** need to register `input_cost_per_second`.
```python
# !pip install boto3
from litellm import completion, completion_cost
os.environ["AWS_ACCESS_KEY_ID"] = ""
os.environ["AWS_SECRET_ACCESS_KEY"] = ""
os.environ["AWS_REGION_NAME"] = ""
def test_completion_sagemaker():
try:
print("testing sagemaker")
response = completion(
model="sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4",
messages=[{"role": "user", "content": "Hey, how's it going?"}],
input_cost_per_second=0.000420,
)
# Add any assertions here to check the response
print(response)
cost = completion_cost(completion_response=response)
print(cost)
except Exception as e:
raise Exception(f"Error occurred: {e}")
```
## Cost Per Token (e.g. Azure)
```python
# !pip install boto3
from litellm import completion, completion_cost
## set ENV variables
os.environ["AZURE_API_KEY"] = ""
os.environ["AZURE_API_BASE"] = ""
os.environ["AZURE_API_VERSION"] = ""
def test_completion_azure_model():
try:
print("testing azure custom pricing")
# azure call
response = completion(
model = "azure/<your_deployment_name>",
messages = [{ "content": "Hello, how are you?","role": "user"}]
input_cost_per_token=0.005,
output_cost_per_token=1,
)
# Add any assertions here to check the response
print(response)
cost = completion_cost(completion_response=response)
print(cost)
except Exception as e:
raise Exception(f"Error occurred: {e}")
test_completion_azure_model()
```

View file

@ -42,6 +42,7 @@ const sidebars = {
"proxy/configs", "proxy/configs",
"proxy/reliability", "proxy/reliability",
"proxy/cost_tracking", "proxy/cost_tracking",
"proxy/custom_pricing",
"proxy/self_serve", "proxy/self_serve",
"proxy/virtual_keys", "proxy/virtual_keys",
{ {
@ -183,7 +184,6 @@ const sidebars = {
"anthropic_completion" "anthropic_completion"
], ],
}, },
"proxy/custom_pricing",
{ {
type: "category", type: "category",
label: "Secret Manager", label: "Secret Manager",
@ -199,6 +199,8 @@ const sidebars = {
"routing", "routing",
"scheduler", "scheduler",
"set_keys", "set_keys",
"completion/token_usage",
"sdk_custom_pricing",
"budget_manager", "budget_manager",
"caching/all_caches", "caching/all_caches",
{ {
@ -208,7 +210,6 @@ const sidebars = {
}, },
], ],
}, },
"completion/token_usage",
"load_test", "load_test",
{ {
type: "category", type: "category",