diff --git a/docs/my-website/docs/proxy_server.md b/docs/my-website/docs/proxy_server.md
index aadc08542..ab976514e 100644
--- a/docs/my-website/docs/proxy_server.md
+++ b/docs/my-website/docs/proxy_server.md
@@ -151,6 +151,38 @@ $ litellm --model command-nightly
### Deploy Proxy
+
+
+
+
+**Step 1: Clone the repo**
+```shell
+git clone https://github.com/BerriAI/liteLLM-proxy.git
+```
+
+**Step 2: Put your API keys in .env**
+Copy the .env.template and put in the relevant keys (e.g. OPENAI_API_KEY="sk-..")
+
+**Step 3: Test your proxy**
+Start your proxy server
+```shell
+cd litellm-proxy && python3 main.py
+```
+
+Make your first call
+```python
+import openai
+
+openai.api_key = "sk-litellm-master-key"
+openai.api_base = "http://0.0.0.0:8080"
+
+response = openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "Hey"}])
+
+print(response)
+```
+
+
+
Deploy the proxy to https://api.litellm.ai
```shell
@@ -161,7 +193,6 @@ $ litellm --model claude-instant-1 --deploy
```
This will host a ChatCompletions API at: https://api.litellm.ai/44508ad4
-
#### Other supported models:
@@ -280,6 +311,8 @@ curl --location 'https://api.litellm.ai/44508ad4/chat/completions' \
```
+
+
## Setting api base, temperature, max tokens
diff --git a/litellm/__pycache__/__init__.cpython-311.pyc b/litellm/__pycache__/__init__.cpython-311.pyc
index 191de47ab..36890b238 100644
Binary files a/litellm/__pycache__/__init__.cpython-311.pyc and b/litellm/__pycache__/__init__.cpython-311.pyc differ
diff --git a/litellm/__pycache__/main.cpython-311.pyc b/litellm/__pycache__/main.cpython-311.pyc
index 9a5063aa0..57e4b1668 100644
Binary files a/litellm/__pycache__/main.cpython-311.pyc and b/litellm/__pycache__/main.cpython-311.pyc differ
diff --git a/litellm/__pycache__/utils.cpython-311.pyc b/litellm/__pycache__/utils.cpython-311.pyc
index 51ba1a493..35c5c2fdb 100644
Binary files a/litellm/__pycache__/utils.cpython-311.pyc and b/litellm/__pycache__/utils.cpython-311.pyc differ
diff --git a/litellm/main.py b/litellm/main.py
index 17522b1e7..1cb7e300f 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -29,22 +29,24 @@ from litellm.utils import (
get_api_key,
mock_completion_streaming_obj,
)
-from .llms import anthropic
-from .llms import together_ai
-from .llms import ai21
-from .llms import sagemaker
-from .llms import bedrock
-from .llms import huggingface_restapi
-from .llms import replicate
-from .llms import aleph_alpha
-from .llms import nlp_cloud
-from .llms import baseten
-from .llms import vllm
-from .llms import ollama
-from .llms import cohere
-from .llms import petals
-from .llms import oobabooga
-from .llms import palm
+from .llms import (
+ anthropic,
+ together_ai,
+ ai21,
+ sagemaker,
+ bedrock,
+ huggingface_restapi,
+ replicate,
+ aleph_alpha,
+ nlp_cloud,
+ baseten,
+ vllm,
+ ollama,
+ cohere,
+ petals,
+ oobabooga,
+ palm)
+from .llms.prompt_templates.factory import prompt_factory, custom_prompt
import tiktoken
from concurrent.futures import ThreadPoolExecutor
from typing import Callable, List, Optional, Dict
@@ -1040,13 +1042,25 @@ def completion(
response = model_response
elif custom_llm_provider == "ollama":
endpoint = (
- litellm.api_base if litellm.api_base is not None else api_base
+ litellm.api_base
+ or api_base
+ or "http://localhost:11434"
)
- prompt = " ".join([message["content"] for message in messages])
+ if model in litellm.custom_prompt_dict:
+ # check if the model has a registered custom prompt
+ model_prompt_details = litellm.custom_prompt_dict[model]
+ prompt = custom_prompt(
+ role_dict=model_prompt_details["roles"],
+ initial_prompt_value=model_prompt_details["initial_prompt_value"],
+ final_prompt_value=model_prompt_details["final_prompt_value"],
+ messages=messages
+ )
+ else:
+ prompt = prompt_factory(model=model, messages=messages)
## LOGGING
logging.pre_call(
- input=prompt, api_key=None, additional_args={"endpoint": endpoint}
+ input=prompt, api_key=None, additional_args={"endpoint": endpoint, "custom_prompt_dict": litellm.custom_prompt_dict}
)
if kwargs.get('acompletion', False) == True:
async_generator = ollama.async_get_ollama_response_stream(endpoint, model, prompt)
diff --git a/litellm/tests/test_ollama_local.py b/litellm/tests/test_ollama_local.py
index d7247d05a..a61b31830 100644
--- a/litellm/tests/test_ollama_local.py
+++ b/litellm/tests/test_ollama_local.py
@@ -1,5 +1,5 @@
-# ##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
-# # https://ollama.ai/
+# # ##### THESE TESTS CAN ONLY RUN LOCALLY WITH THE OLLAMA SERVER RUNNING ######
+# # # https://ollama.ai/
# import sys, os
# import traceback
@@ -16,27 +16,55 @@
# user_message = "respond in 20 words. who are you?"
# messages = [{ "content": user_message,"role": "user"}]
-# def test_completion_ollama():
-# try:
-# response = completion(
-# model="ollama/llama2",
-# messages=messages,
-# api_base="http://localhost:11434"
-# )
-# print(response)
-# except Exception as e:
-# pytest.fail(f"Error occurred: {e}")
+# # def test_completion_ollama():
+# # try:
+# # response = completion(
+# # model="ollama/llama2",
+# # messages=messages,
+# # api_base="http://localhost:11434"
+# # )
+# # print(response)
+# # except Exception as e:
+# # pytest.fail(f"Error occurred: {e}")
# # test_completion_ollama()
-# def test_completion_ollama_stream():
+# # def test_completion_ollama_stream():
+# # user_message = "what is litellm?"
+# # messages = [{ "content": user_message,"role": "user"}]
+# # try:
+# # response = completion(
+# # model="ollama/llama2",
+# # messages=messages,
+# # stream=True
+# # )
+# # print(response)
+# # for chunk in response:
+# # print(chunk)
+# # # print(chunk['choices'][0]['delta'])
+
+# # except Exception as e:
+# # pytest.fail(f"Error occurred: {e}")
+
+# # test_completion_ollama_stream()
+
+
+# def test_completion_ollama_custom_prompt_template():
# user_message = "what is litellm?"
+# litellm.register_prompt_template(
+# model="llama2",
+# roles={
+# "system": {"pre_message": "System: "},
+# "user": {"pre_message": "User: "},
+# "assistant": {"pre_message": "Assistant: "}
+# }
+# )
# messages = [{ "content": user_message,"role": "user"}]
+# litellm.set_verbose = True
# try:
# response = completion(
# model="ollama/llama2",
# messages=messages,
-# api_base="http://localhost:11434",
# stream=True
# )
# print(response)
@@ -45,54 +73,54 @@
# # print(chunk['choices'][0]['delta'])
# except Exception as e:
+# traceback.print_exc()
# pytest.fail(f"Error occurred: {e}")
-# test_completion_ollama_stream()
+# test_completion_ollama_custom_prompt_template()
+
+# # async def test_completion_ollama_async_stream():
+# # user_message = "what is the weather"
+# # messages = [{ "content": user_message,"role": "user"}]
+# # try:
+# # response = await litellm.acompletion(
+# # model="ollama/llama2",
+# # messages=messages,
+# # api_base="http://localhost:11434",
+# # stream=True
+# # )
+# # async for chunk in response:
+# # print(chunk)
+
+# # # print(chunk['choices'][0]['delta'])
+
+# # except Exception as e:
+# # pytest.fail(f"Error occurred: {e}")
+
+# # # import asyncio
+# # # asyncio.run(test_completion_ollama_async_stream())
+
+# # def prepare_messages_for_chat(text: str) -> list:
+# # messages = [
+# # {"role": "user", "content": text},
+# # ]
+# # return messages
-# async def test_completion_ollama_async_stream():
-# user_message = "what is the weather"
-# messages = [{ "content": user_message,"role": "user"}]
-# try:
-# response = await litellm.acompletion(
-# model="ollama/llama2",
-# messages=messages,
-# api_base="http://localhost:11434",
-# stream=True
-# )
-# async for chunk in response:
-# print(chunk)
+# # async def ask_question():
+# # params = {
+# # "messages": prepare_messages_for_chat("What is litellm? tell me 10 things about it who is sihaan.write an essay"),
+# # "api_base": "http://localhost:11434",
+# # "model": "ollama/llama2",
+# # "stream": True,
+# # }
+# # response = await litellm.acompletion(**params)
+# # return response
-# # print(chunk['choices'][0]['delta'])
+# # async def main():
+# # response = await ask_question()
+# # async for chunk in response:
+# # print(chunk)
-# except Exception as e:
-# pytest.fail(f"Error occurred: {e}")
-
-# # import asyncio
-# # asyncio.run(test_completion_ollama_async_stream())
-
-# def prepare_messages_for_chat(text: str) -> list:
-# messages = [
-# {"role": "user", "content": text},
-# ]
-# return messages
-
-
-# async def ask_question():
-# params = {
-# "messages": prepare_messages_for_chat("What is litellm? tell me 10 things about it who is sihaan.write an essay"),
-# "api_base": "http://localhost:11434",
-# "model": "ollama/llama2",
-# "stream": True,
-# }
-# response = await litellm.acompletion(**params)
-# return response
-
-# async def main():
-# response = await ask_question()
-# async for chunk in response:
-# print(chunk)
-
-# if __name__ == "__main__":
-# import asyncio
-# asyncio.run(main())
+# # if __name__ == "__main__":
+# # import asyncio
+# # asyncio.run(main())
diff --git a/litellm/utils.py b/litellm/utils.py
index c3d629a92..573279b8e 100644
--- a/litellm/utils.py
+++ b/litellm/utils.py
@@ -2081,24 +2081,28 @@ def modify_integration(integration_name, integration_params):
# custom prompt helper function
def register_prompt_template(model: str, roles: dict, initial_prompt_value: str = "", final_prompt_value: str = ""):
"""
+ Format the openai prompt, to follow your custom format.
Example usage:
```
import litellm
litellm.register_prompt_template(
model="llama-2",
+ initial_prompt_value="You are a good assistant" # [OPTIONAL]
roles={
"system": {
- "pre_message": "[INST] <>\n",
- "post_message": "\n<>\n [/INST]\n"
+ "pre_message": "[INST] <>\n", # [OPTIONAL]
+ "post_message": "\n<>\n [/INST]\n" # [OPTIONAL]
},
- "user": { # follow this format https://github.com/facebookresearch/llama/blob/77062717054710e352a99add63d160274ce670c6/llama/generation.py#L348
- "pre_message": "[INST] ",
- "post_message": " [/INST]\n"
+ "user": {
+ "pre_message": "[INST] ", # [OPTIONAL]
+ "post_message": " [/INST]" # [OPTIONAL]
},
"assistant": {
- "post_message": "\n" # follows this - https://replicate.com/blog/how-to-prompt-llama
+ "pre_message": "\n" # [OPTIONAL]
+ "post_message": "\n" # [OPTIONAL]
}
}
+ final_prompt_value="Now answer as best you can:" # [OPTIONAL]
)
```
"""