fix(proxy/): remove cloned repo

This commit is contained in:
Krrish Dholakia 2023-10-12 21:46:18 -07:00
parent 8bb9be3c5a
commit f2eb1b4658
8 changed files with 0 additions and 775 deletions

View file

@ -1,47 +0,0 @@
# litellm-proxy
A local, fast, and lightweight **OpenAI-compatible server** to call 100+ LLM APIs.
## usage
```shell
$ pip install litellm
```
```shell
$ litellm --model ollama/codellama
#INFO: Ollama running on http://0.0.0.0:8000
```
## replace openai base
```python
import openai
openai.api_base = "http://0.0.0.0:8000"
print(openai.ChatCompletion.create(model="test", messages=[{"role":"user", "content":"Hey!"}]))
```
[**See how to call Huggingface,Bedrock,TogetherAI,Anthropic, etc.**](https://docs.litellm.ai/docs/proxy_server)
## configure proxy
To save API Keys, change model prompt, etc. you'll need to create a local instance of it:
```shell
$ litellm --create-proxy
```
This will create a local project called `litellm-proxy` in your current directory, that has:
* **proxy_cli.py**: Runs the proxy
* **proxy_server.py**: Contains the API calling logic
- `/chat/completions`: receives `openai.ChatCompletion.create` call.
- `/completions`: receives `openai.Completion.create` call.
- `/models`: receives `openai.Model.list()` call
* **secrets.toml**: Stores your api keys, model configs, etc.
Run it by doing:
```shell
$ cd litellm-proxy
```
```shell
$ python proxy_cli.py --model ollama/llama # replace with your model name
```

View file

@ -1 +0,0 @@
from . import *

View file

@ -1,85 +0,0 @@
{
"20231012182157625128": {
"pre_api_call": {
"model": "anthropic.claude-v2",
"messages": [
{
"role": "user",
"content": "what do you know?"
}
],
"optional_params": {
"temperature": 0.1,
"stream": true
},
"litellm_params": {
"return_async": false,
"api_key": null,
"force_timeout": 600,
"logger_fn": null,
"verbose": false,
"custom_llm_provider": "bedrock",
"api_base": null,
"litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
"model_alias_map": {},
"completion_call_id": null,
"metadata": null,
"stream_response": {}
},
"input": "\n\nHuman: \n\nHuman: what do you know?\n\nAssistant: ",
"api_key": "",
"additional_args": {
"complete_input_dict": "{\"prompt\": \"\\n\\nHuman: \\n\\nHuman: what do you know?\\n\\nAssistant: \", \"temperature\": 0.1, \"max_tokens_to_sample\": 256}"
},
"log_event_type": "pre_api_call"
},
"post_api_call": {
"model": "anthropic.claude-v2",
"messages": [
{
"role": "user",
"content": "what do you know?"
}
],
"optional_params": {
"temperature": 0.1,
"stream": true
},
"litellm_params": {
"return_async": false,
"api_key": null,
"force_timeout": 600,
"logger_fn": null,
"verbose": false,
"custom_llm_provider": "bedrock",
"api_base": null,
"litellm_call_id": "902640b5-4a26-4629-932d-35d6cf4e1635",
"model_alias_map": {},
"completion_call_id": null,
"metadata": null,
"stream_response": {}
},
"input": null,
"api_key": null,
"additional_args": {},
"log_event_type": "post_api_call",
"original_response": "<class 'generator'>",
"complete_streaming_response": {
"id": "chatcmpl-1757e5ea-71f2-44a2-9d8d-1ba8238a7c99",
"object": "chat.completion.chunk",
"created": 1697160117,
"model": "anthropic.claude-v2",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": " I'm Claude, an AI assistant created by Anthropic. I don't actually have general knowledge about the world. I'm an AI conversational model trained by Anthropic to be helpful, harmless, and honest."
},
"finish_reason": "stop_sequence"
}
]
}
}
}
}

View file

@ -1,8 +0,0 @@
{
"Oct-12-2023": {
"claude-2": {
"cost": 0.02365918,
"num_requests": 1
}
}
}

View file

@ -1,142 +0,0 @@
import click
import subprocess, traceback
import os, sys
import random
from dotenv import load_dotenv
load_dotenv()
from importlib import resources
import shutil
telemetry = None
def run_ollama_serve():
command = ['ollama', 'serve']
with open(os.devnull, 'w') as devnull:
process = subprocess.Popen(command, stdout=devnull, stderr=devnull)
def clone_subfolder(repo_url, subfolder, destination):
# Clone the full repo
repo_name = repo_url.split('/')[-1]
repo_master = os.path.join(destination, "repo_master")
subprocess.run(['git', 'clone', repo_url, repo_master])
# Move into the subfolder
subfolder_path = os.path.join(repo_master, subfolder)
# Copy subfolder to destination
for file_name in os.listdir(subfolder_path):
source = os.path.join(subfolder_path, file_name)
if os.path.isfile(source):
shutil.copy(source, destination)
else:
dest_path = os.path.join(destination, file_name)
shutil.copytree(source, dest_path)
# Remove cloned repo folder
subprocess.run(['rm', '-rf', os.path.join(destination, "repo_master")])
feature_telemetry(feature="create-proxy")
def is_port_in_use(port):
import socket
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(('localhost', port)) == 0
@click.command()
@click.option('--host', default='0.0.0.0', help='Host for the server to listen on.')
@click.option('--port', default=8000, help='Port to bind the server to.')
@click.option('--api_base', default=None, help='API base URL.')
@click.option('--model', default=None, help='The model name to pass to litellm expects')
@click.option('--deploy', is_flag=True, type=bool, help='Get a deployed proxy endpoint - api.litellm.ai')
@click.option('--debug', default=False, is_flag=True, type=bool, help='To debug the input')
@click.option('--temperature', default=None, type=float, help='Set temperature for the model')
@click.option('--max_tokens', default=None, type=int, help='Set max tokens for the model')
@click.option('--drop_params', is_flag=True, help='Drop any unmapped params')
@click.option('--create_proxy', is_flag=True, help='Creates a local OpenAI-compatible server template')
@click.option('--add_function_to_prompt', is_flag=True, help='If function passed but unsupported, pass it as prompt')
@click.option('--max_budget', default=None, type=float, help='Set max budget for API calls - works for hosted models like OpenAI, TogetherAI, Anthropic, etc.`')
@click.option('--telemetry', default=True, type=bool, help='Helps us know if people are using this feature. Turn this off by doing `--telemetry False`')
@click.option('--test', flag_value=True, help='proxy chat completions url to make a test request to')
@click.option('--local', is_flag=True, default=False, help='for local debugging')
@click.option('--cost', is_flag=True, default=False, help='for viewing cost logs')
def run_server(host, port, api_base, model, deploy, debug, temperature, max_tokens, drop_params, create_proxy, add_function_to_prompt, max_budget, telemetry, test, local, cost):
global feature_telemetry
if local:
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
debug = True
else:
try:
from .proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
except ImportError as e:
from proxy_server import app, initialize, deploy_proxy, print_cost_logs, usage_telemetry
feature_telemetry = usage_telemetry
if create_proxy == True:
repo_url = 'https://github.com/BerriAI/litellm'
subfolder = 'litellm/proxy'
destination = os.path.join(os.getcwd(), 'litellm-proxy')
clone_subfolder(repo_url, subfolder, destination)
return
if deploy == True:
print(f"\033[32mLiteLLM: Deploying your proxy to api.litellm.ai\033[0m\n")
print(f"\033[32mLiteLLM: Deploying proxy for model: {model}\033[0m\n")
url = deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy)
print(f"\033[32mLiteLLM: Deploy Successfull\033[0m\n")
print(f"\033[32mLiteLLM: Your deployed url: {url}\033[0m\n")
print(f"\033[32mLiteLLM: Test your URL using the following: \"litellm --test {url}\"\033[0m")
return
if model and "ollama" in model:
run_ollama_serve()
if cost == True:
print_cost_logs()
return
if test != False:
click.echo('LiteLLM: Making a test ChatCompletions request to your proxy')
import openai
if test == True: # flag value set
api_base = f"http://{host}:{port}"
else:
api_base = test
openai.api_base = api_base
openai.api_key = "temp-key"
print(openai.api_base)
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, acknowledge that you got it"
}
])
click.echo(f'LiteLLM: response from proxy {response}')
click.echo(f'LiteLLM: response from proxy with streaming {response}')
response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages = [
{
"role": "user",
"content": "this is a test request, acknowledge that you got it"
}
],
stream=True,
)
for chunk in response:
click.echo(f'LiteLLM: streaming response from proxy {chunk}')
return
else:
initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt)
try:
import uvicorn
except:
raise ImportError("Uvicorn needs to be imported. Run - `pip install uvicorn`")
print(f"\033[32mLiteLLM: Deployed Proxy Locally\033[0m\n")
print(f"\033[32mLiteLLM: Test your local endpoint with: \"litellm --test\" [In a new terminal tab]\033[0m\n")
print(f"\033[32mLiteLLM: Deploy your proxy using the following: \"litellm --model claude-instant-1 --deploy\" Get an https://api.litellm.ai/chat/completions endpoint \033[0m\n")
if port == 8000 and is_port_in_use(port):
port = random.randint(1024, 49152)
uvicorn.run(app, host=host, port=port)
if __name__ == "__main__":
run_server()

View file

@ -1,461 +0,0 @@
import sys, os, platform, time, copy
import threading
import shutil, random, traceback
# sys.path.insert(
# 0, os.path.abspath("../..")
# ) # Adds the parent directory to the system path - for litellm local dev
try:
import uvicorn
import fastapi
import tomli as tomllib
import appdirs
except ImportError:
import subprocess
import sys
subprocess.check_call([sys.executable, "-m", "pip", "install", "uvicorn", "fastapi", "tomli", "appdirs"])
import uvicorn
import fastapi
import tomli as tomllib
import appdirs
import random
list_of_messages = [
"'The thing I wish you improved is...'",
"'A feature I really want is...'",
"'The worst thing about this product is...'",
"'This product would be better if...'",
"'I don't like how this works...'",
"'It would help me if you could add...'",
"'This feature doesn't meet my needs because...'",
"'I get frustrated when the product...'",
]
def generate_feedback_box():
box_width = 60
# Select a random message
message = random.choice(list_of_messages)
print()
print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m')
print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m')
print('\033[1;37m' + '# {:^59} #\033[0m'.format(message))
print('\033[1;37m' + '# {:^59} #\033[0m'.format('https://github.com/BerriAI/litellm/issues/new'))
print('\033[1;37m' + '#' + ' '*box_width + '#\033[0m')
print('\033[1;37m' + '#' + '-'*box_width + '#\033[0m')
print()
print(' Thank you for using LiteLLM! - Krrish & Ishaan')
print()
print()
generate_feedback_box()
print()
print("\033[1;31mGive Feedback / Get Help: https://github.com/BerriAI/litellm/issues/new\033[0m")
print()
print("\033[1;34mDocs: https://docs.litellm.ai/docs/proxy_server\033[0m")
print()
import litellm
from fastapi import FastAPI, Request
from fastapi.routing import APIRouter
from fastapi.responses import StreamingResponse, FileResponse
import json
import logging
app = FastAPI()
router = APIRouter()
user_api_base = None
user_model = None
user_debug = False
user_max_tokens = None
user_temperature = None
user_telemetry = True
user_config = None
config_filename = "secrets.toml"
config_dir = os.getcwd()
user_config_path = os.path.join(config_dir, config_filename)
log_file = 'api_log.json'
#### HELPER FUNCTIONS ####
def print_verbose(print_statement):
global user_debug
if user_debug:
print(print_statement)
def usage_telemetry(feature: str): # helps us know if people are using this feature. Set `litellm --telemetry False` to your cli call to turn this off
print(f"user_telemtry: {user_telemetry}")
if user_telemetry:
print(f"feature telemetry: {feature}")
data = {
"feature": feature # "local_proxy_server"
}
threading.Thread(target=litellm.utils.litellm_telemetry, args=(data,), daemon=True).start()
def load_config():
try:
global user_config, user_api_base, user_max_tokens, user_temperature, user_model
# As the .env file is typically much simpler in structure, we use load_dotenv here directly
with open(user_config_path, "rb") as f:
user_config = tomllib.load(f)
## load keys
if "keys" in user_config:
for key in user_config["keys"]:
if key == "HUGGINGFACE_API_KEY":
litellm.huggingface_key = user_config["keys"][key]
elif key == "OPENAI_API_KEY":
litellm.openai_key = user_config["keys"][key]
elif key == "TOGETHERAI_API_KEY":
litellm.togetherai_api_key = user_config["keys"][key]
elif key == "NLP_CLOUD_API_KEY":
litellm.nlp_cloud_key = user_config["keys"][key]
elif key == "ANTHROPIC_API_KEY":
litellm.anthropic_key = user_config["keys"][key]
elif key == "REPLICATE_API_KEY":
litellm.replicate_key = user_config["keys"][key]
elif key == "AWS_ACCESS_KEY_ID":
os.environ["AWS_ACCESS_KEY_ID"] = user_config["keys"][key]
elif key == "AWS_SECRET_ACCESS_KEY":
os.environ["AWS_SECRET_ACCESS_KEY"] = user_config["keys"][key]
## settings
litellm.add_function_to_prompt = user_config["general"].get("add_function_to_prompt", True) # by default add function to prompt if unsupported by provider
litellm.drop_params = user_config["general"].get("drop_params", True) # by default drop params if unsupported by provider
## load model config - to set this run `litellm --config`
model_config = None
if user_model in user_config["model"]:
model_config = user_config["model"][user_model]
print_verbose(f"user_config: {user_config}")
print_verbose(f"model_config: {model_config}")
if model_config is None:
return
user_model = model_config["model_name"] # raise an error if this isn't set when user runs either `litellm --model local_model` or `litellm --model hosted_model`
print_verbose(f"user_model: {user_model}")
user_max_tokens = model_config.get("max_tokens", None)
user_temperature = model_config.get("temperature", None)
user_api_base = model_config.get("api_base", None)
## custom prompt template
if "prompt_template" in model_config:
model_prompt_template = model_config["prompt_template"]
if len(model_prompt_template.keys()) > 0: # if user has initialized this at all
litellm.register_prompt_template(
model=user_model,
initial_prompt_value=model_prompt_template.get("MODEL_PRE_PROMPT", ""),
roles={
"system": {
"pre_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_START_TOKEN", ""),
"post_message": model_prompt_template.get("MODEL_SYSTEM_MESSAGE_END_TOKEN", ""),
},
"user": {
"pre_message": model_prompt_template.get("MODEL_USER_MESSAGE_START_TOKEN", ""),
"post_message": model_prompt_template.get("MODEL_USER_MESSAGE_END_TOKEN", ""),
},
"assistant": {
"pre_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_START_TOKEN", ""),
"post_message": model_prompt_template.get("MODEL_ASSISTANT_MESSAGE_END_TOKEN", ""),
}
},
final_prompt_value=model_prompt_template.get("MODEL_POST_PROMPT", ""),
)
except Exception as e:
traceback.print_exc()
def initialize(model, api_base, debug, temperature, max_tokens, max_budget, telemetry, drop_params, add_function_to_prompt):
global user_model, user_api_base, user_debug, user_max_tokens, user_temperature, user_telemetry
user_model = model
user_debug = debug
load_config()
user_api_base = api_base
user_max_tokens = max_tokens
user_temperature = temperature
user_telemetry = telemetry
usage_telemetry(feature="local_proxy_server")
if drop_params == True:
litellm.drop_params = True
if add_function_to_prompt == True:
litellm.add_function_to_prompt = True
if max_budget:
litellm.max_budget = max_budget
def deploy_proxy(model, api_base, debug, temperature, max_tokens, telemetry, deploy):
import requests
# Load .env file
# Prepare data for posting
data = {
"model": model,
"api_base": api_base,
"temperature": temperature,
"max_tokens": max_tokens,
}
# print(data)
# Make post request to the url
url = "https://litellm-api.onrender.com/deploy"
# url = "http://0.0.0.0:4000/deploy"
with open(".env", "w") as env_file:
for row in data:
env_file.write(f"{row.upper()}='{data[row]}'\n")
env_file.write("\n\n")
for key in os.environ:
value = os.environ[key]
env_file.write(f"{key}='{value}'\n")
# env_file.write(str(os.environ))
files = {"file": open(".env", "rb")}
# print(files)
response = requests.post(url, data=data, files=files)
# print(response)
# Check the status of the request
if response.status_code != 200:
return f"Request to url: {url} failed with status: {response.status_code}"
# Reading the response
response_data = response.json()
# print(response_data)
url = response_data["url"]
# # Do something with response_data
return url
# for streaming
def data_generator(response):
print_verbose("inside generator")
for chunk in response:
print_verbose(f"returned chunk: {chunk}")
yield f"data: {json.dumps(chunk)}\n\n"
def track_cost_callback(
kwargs, # kwargs to completion
completion_response, # response from completion
start_time, end_time # start/end time
):
# track cost like this
# {
# "Oct12": {
# "gpt-4": 10,
# "claude-2": 12.01,
# },
# "Oct 15": {
# "ollama/llama2": 0.0,
# "gpt2": 1.2
# }
# }
try:
# for streaming responses
if "complete_streaming_response" in kwargs:
# for tracking streaming cost we pass the "messages" and the output_text to litellm.completion_cost
completion_response=kwargs["complete_streaming_response"]
input_text = kwargs["messages"]
output_text = completion_response["choices"][0]["message"]["content"]
response_cost = litellm.completion_cost(
model = kwargs["model"],
messages = input_text,
completion=output_text
)
model = kwargs['model']
print("streaming response_cost", response_cost)
# for non streaming responses
else:
# we pass the completion_response obj
if kwargs["stream"] != True:
response_cost = litellm.completion_cost(completion_response=completion_response)
print("regular response_cost", response_cost)
model = completion_response["model"]
# read/write from json for storing daily model costs
cost_data = {}
try:
with open("costs.json") as f:
cost_data = json.load(f)
except FileNotFoundError:
cost_data = {}
import datetime
date = datetime.datetime.now().strftime("%b-%d-%Y")
if date not in cost_data:
cost_data[date] = {}
if kwargs["model"] in cost_data[date]:
cost_data[date][kwargs["model"]]["cost"] += response_cost
cost_data[date][kwargs["model"]]["num_requests"] += 1
else:
cost_data[date][kwargs["model"]] = {
"cost": response_cost,
"num_requests": 1
}
with open("costs.json", "w") as f:
json.dump(cost_data, f, indent=2)
except:
pass
def logger(
kwargs, # kwargs to completion
completion_response=None, # response from completion
start_time=None,
end_time=None # start/end time
):
log_event_type = kwargs['log_event_type']
print(f"REACHES LOGGER: {log_event_type}")
try:
if log_event_type == 'pre_api_call':
inference_params = copy.deepcopy(kwargs)
timestamp = inference_params.pop('start_time')
dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
log_data = {
dt_key: {
'pre_api_call': inference_params
}
}
try:
with open(log_file, 'r') as f:
existing_data = json.load(f)
except FileNotFoundError:
existing_data = {}
existing_data.update(log_data)
with open(log_file, 'w') as f:
json.dump(existing_data, f, indent=2)
elif log_event_type == 'post_api_call':
print(f"post api call kwargs: {kwargs}")
if "stream" not in kwargs["optional_params"] or kwargs["optional_params"]["stream"] is False or kwargs.get("complete_streaming_response", False):
inference_params = copy.deepcopy(kwargs)
timestamp = inference_params.pop('start_time')
dt_key = timestamp.strftime("%Y%m%d%H%M%S%f")[:23]
with open(log_file, 'r') as f:
existing_data = json.load(f)
existing_data[dt_key]['post_api_call'] = inference_params
with open(log_file, 'w') as f:
json.dump(existing_data, f, indent=2)
except:
traceback.print_exc()
litellm.input_callback = [logger]
litellm.success_callback = [logger]
litellm.failure_callback = [logger]
def litellm_completion(data, type):
try:
if user_model:
data["model"] = user_model
# override with user settings
if user_temperature:
data["temperature"] = user_temperature
if user_max_tokens:
data["max_tokens"] = user_max_tokens
if user_api_base:
data["api_base"] = user_api_base
if type == "completion":
response = litellm.text_completion(**data)
elif type == "chat_completion":
response = litellm.completion(**data)
if 'stream' in data and data['stream'] == True: # use generate_responses to stream responses
return StreamingResponse(data_generator(response), media_type='text/event-stream')
print_verbose(f"response: {response}")
return response
except Exception as e:
if "Invalid response object from API" in str(e):
completion_call_details = {}
if user_model:
completion_call_details["model"] = user_model
else:
completion_call_details["model"] = data['model']
if user_api_base:
completion_call_details["api_base"] = user_api_base
else:
completion_call_details["api_base"] = None
print(f"\033[1;31mLiteLLM.Exception: Invalid API Call. Call details: Model: \033[1;37m{completion_call_details['model']}\033[1;31m; LLM Provider: \033[1;37m{e.llm_provider}\033[1;31m; Custom API Base - \033[1;37m{completion_call_details['api_base']}\033[1;31m\033[0m")
if completion_call_details["api_base"] == "http://localhost:11434":
print()
print("Trying to call ollama? Try `litellm --model ollama/llama2 --api_base http://localhost:11434`")
print()
else:
print(f"\033[1;31mLiteLLM.Exception: {str(e)}\033[0m")
return {"message": "An error occurred"}, 500
#### API ENDPOINTS ####
@router.get("/models") # if project requires model list
def model_list():
if user_model != None:
return dict(
data=[{"id": user_model, "object": "model", "created": 1677610602, "owned_by": "openai"}],
object="list",
)
else:
all_models = litellm.model_list
return dict(
data = [{"id": model, "object": "model", "created": 1677610602, "owned_by": "openai"} for model in all_models],
object="list",
)
@router.post("/completions")
async def completion(request: Request):
data = await request.json()
return litellm_completion(data=data, type="completion")
@router.post("/chat/completions")
async def chat_completion(request: Request):
data = await request.json()
print(f"data passed in: {data}")
response = litellm_completion(data, type="chat_completion")
return response
# V1 Endpoints - some apps expect a v1 endpoint - these call the regular function
@router.post("/v1/completions")
async def v1_completion(request: Request):
data = await request.json()
return litellm_completion(data=data, type="completion")
@router.post("/v1/chat/completions")
async def v1_chat_completion(request: Request):
data = await request.json()
print_verbose(f"data passed in: {data}")
response = litellm_completion(data, type="chat_completion")
return response
def print_cost_logs():
with open('costs.json', 'r') as f:
# print this in green
print("\033[1;32m")
print(f.read())
print("\033[0m")
return
@router.get("/ollama_logs")
async def retrieve_server_log(request: Request):
filepath = os.path.expanduser('~/.ollama/logs/server.log')
return FileResponse(filepath)
@router.get("/")
async def home(request: Request):
return "LiteLLM: RUNNING"
app.include_router(router)

View file

@ -1,31 +0,0 @@
[keys]
# HUGGINGFACE_API_KEY="" # Uncomment to save your Hugging Face API key
# OPENAI_API_KEY="" # Uncomment to save your OpenAI API Key
# TOGETHERAI_API_KEY="" # Uncomment to save your TogetherAI API key
# NLP_CLOUD_API_KEY="" # Uncomment to save your NLP Cloud API key
# ANTHROPIC_API_KEY="" # Uncomment to save your Anthropic API key
# REPLICATE_API_KEY="" # Uncomment to save your Replicate API key
# AWS_ACCESS_KEY_ID = "" # Uncomment to save your Bedrock/Sagemaker access keys
# AWS_SECRET_ACCESS_KEY = "" # Uncomment to save your Bedrock/Sagemaker access keys
[general]
# add_function_to_prompt = True # e.g: Ollama doesn't support functions, so add it to the prompt instead
# drop_params = True # drop any params not supported by the provider (e.g. Ollama)
[model."ollama/llama2"] # run via `litellm --model ollama/llama2`
# max_tokens = "" # set max tokens for the model
# temperature = "" # set temperature for the model
# api_base = "" # set a custom api base for the model
[model."ollama/llama2".prompt_template] # [OPTIONAL] LiteLLM can automatically formats the prompt - docs: https://docs.litellm.ai/docs/completion/prompt_formatting
# MODEL_SYSTEM_MESSAGE_START_TOKEN = "[INST] <<SYS>>\n" # This does not need to be a token, can be any string
# MODEL_SYSTEM_MESSAGE_END_TOKEN = "\n<</SYS>>\n [/INST]\n" # This does not need to be a token, can be any string
# MODEL_USER_MESSAGE_START_TOKEN = "[INST] " # This does not need to be a token, can be any string
# MODEL_USER_MESSAGE_END_TOKEN = " [/INST]\n" # Applies only to user messages. Can be any string.
# MODEL_ASSISTANT_MESSAGE_START_TOKEN = "" # Applies only to assistant messages. Can be any string.
# MODEL_ASSISTANT_MESSAGE_END_TOKEN = "\n" # Applies only to system messages. Can be any string.
# MODEL_PRE_PROMPT = "You are a good bot" # Applied at the start of the prompt
# MODEL_POST_PROMPT = "Now answer as best as you can" # Applied at the end of the prompt