litellm-mirror/litellm/proxy/proxy_config.yaml
2024-02-06 15:17:40 -08:00

104 lines
3.7 KiB
YAML

model_list:
- model_name: azure-gpt-3.5
litellm_params:
model: azure/chatgpt-v-2
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
model_info:
mode: chat
input_cost_per_token: 0.0.00006
output_cost_per_token: 0.00003
max_tokens: 4096
base_model: gpt-3.5-turbo
- model_name: gpt-4
litellm_params:
model: azure/chatgpt-v-2
api_base: https://openai-gpt-4-test-v-1.openai.azure.com/
api_version: "2023-05-15"
api_key: os.environ/AZURE_API_KEY # The `os.environ/` prefix tells litellm to read this from the env. See https://docs.litellm.ai/docs/simple_proxy#load-api-keys-from-vault
- model_name: gpt-vision
litellm_params:
model: azure/gpt-4-vision
base_url: https://gpt-4-vision-resource.openai.azure.com/openai/deployments/gpt-4-vision/extensions
api_key: os.environ/AZURE_VISION_API_KEY
api_version: "2023-09-01-preview"
dataSources:
- type: AzureComputerVision
parameters:
endpoint: os.environ/AZURE_VISION_ENHANCE_ENDPOINT
key: os.environ/AZURE_VISION_ENHANCE_KEY
- model_name: BEDROCK_GROUP
litellm_params:
model: bedrock/cohere.command-text-v14
- model_name: tg-ai
litellm_params:
model: together_ai/mistralai/Mistral-7B-Instruct-v0.1
- model_name: sagemaker
litellm_params:
model: sagemaker/berri-benchmarking-Llama-2-70b-chat-hf-4
- model_name: openai-gpt-3.5
litellm_params:
model: gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: chat
- model_name: azure-cloudflare
litellm_params:
model: azure/chatgpt-v-2
api_base: https://gateway.ai.cloudflare.com/v1/0399b10e77ac6668c80404a5ff49eb37/litellm-test/azure-openai/openai-gpt-4-test-v-1
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
- model_name: azure-embedding-model
litellm_params:
model: azure/azure-embedding-model
api_base: os.environ/AZURE_API_BASE
api_key: os.environ/AZURE_API_KEY
api_version: "2023-07-01-preview"
model_info:
mode: embedding
base_model: text-embedding-ada-002
- model_name: text-embedding-ada-002
litellm_params:
model: text-embedding-ada-002
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: embedding
litellm_settings:
fallbacks: [{"openai-gpt-3.5": ["azure-gpt-3.5"]}]
success_callback: ['langfuse']
max_budget: 10 # global budget for proxy
max_user_budget: 0.0001
budget_duration: 30d # global budget duration, will reset after 30d
default_key_generate_params:
max_budget: 1.5000
models: ["azure-gpt-3.5"]
duration: None
cache: True # set cache responses to True
cache_params:
type: "redis-semantic"
similarity_threshold: 0.8
redis_semantic_cache_embedding_model: azure-embedding-model
upperbound_key_generate_params:
max_budget: 100
duration: "30d"
# setting callback class
# callbacks: custom_callbacks.proxy_handler_instance # sets litellm.callbacks = [proxy_handler_instance]
general_settings:
master_key: sk-1234
alerting: ["slack"]
alerting_threshold: 10 # sends alerts if requests hang for 2 seconds
# database_type: "dynamo_db"
# database_args: { # 👈 all args - https://github.com/BerriAI/litellm/blob/befbcbb7ac8f59835ce47415c128decf37aac328/litellm/proxy/_types.py#L190
# "billing_mode": "PAY_PER_REQUEST",
# "region_name": "us-west-2",
# "ssl_verify": False
# }
environment_variables:
# otel: True # OpenTelemetry Logger
# master_key: sk-1234 # [OPTIONAL] Only use this if you to require all calls to contain this key (Authorization: Bearer sk-1234)