From fd4157cf7130f3ec59e880c19448fac5acdbcb68 Mon Sep 17 00:00:00 2001 From: Ishaan Jaff Date: Sat, 31 Aug 2024 14:57:12 -0700 Subject: [PATCH] docs add cerebras --- docs/my-website/docs/providers/cerebras.md | 145 +++++++++++++++++++++ docs/my-website/sidebars.js | 1 + 2 files changed, 146 insertions(+) create mode 100644 docs/my-website/docs/providers/cerebras.md diff --git a/docs/my-website/docs/providers/cerebras.md b/docs/my-website/docs/providers/cerebras.md new file mode 100644 index 000000000..4fabeb31c --- /dev/null +++ b/docs/my-website/docs/providers/cerebras.md @@ -0,0 +1,145 @@ +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +# Cerebras +https://inference-docs.cerebras.ai/api-reference/chat-completions + +:::tip + +**We support ALL Cerebras models, just set `model=cerebras/` as a prefix when sending litellm requests** + +::: + +## API Key +```python +# env variable +os.environ['CEREBRAS_API_KEY'] +``` + +## Sample Usage +```python +from litellm import completion +import os + +os.environ['CEREBRAS_API_KEY'] = "" +response = completion( + model="cerebras/meta/llama3-70b-instruct", + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + max_tokens=10, + response_format={ "type": "json_object" }, + seed=123, + stop=["\n\n"], + temperature=0.2, + top_p=0.9, + tool_choice="auto", + tools=[], + user="user", +) +print(response) +``` + +## Sample Usage - Streaming +```python +from litellm import completion +import os + +os.environ['CEREBRAS_API_KEY'] = "" +response = completion( + model="cerebras/meta/llama3-70b-instruct", + messages=[ + { + "role": "user", + "content": "What's the weather like in Boston today in Fahrenheit?", + } + ], + stream=True, + max_tokens=10, + response_format={ "type": "json_object" }, + seed=123, + stop=["\n\n"], + temperature=0.2, + top_p=0.9, + tool_choice="auto", + tools=[], + user="user", +) + +for chunk in response: + print(chunk) +``` + + +## Usage with LiteLLM Proxy Server + +Here's how to call a Cerebras model with the LiteLLM Proxy Server + +1. Modify the config.yaml + + ```yaml + model_list: + - model_name: my-model + litellm_params: + model: cerebras/ # add cerebras/ prefix to route as Cerebras provider + api_key: api-key # api key to send your model + ``` + + +2. Start the proxy + + ```bash + $ litellm --config /path/to/config.yaml + ``` + +3. Send Request to LiteLLM Proxy Server + + + + + + ```python + import openai + client = openai.OpenAI( + api_key="sk-1234", # pass litellm proxy key, if you're using virtual keys + base_url="http://0.0.0.0:4000" # litellm-proxy-base url + ) + + response = client.chat.completions.create( + model="my-model", + messages = [ + { + "role": "user", + "content": "what llm are you" + } + ], + ) + + print(response) + ``` + + + + + ```shell + curl --location 'http://0.0.0.0:4000/chat/completions' \ + --header 'Authorization: Bearer sk-1234' \ + --header 'Content-Type: application/json' \ + --data '{ + "model": "my-model", + "messages": [ + { + "role": "user", + "content": "what llm are you" + } + ], + }' + ``` + + + + + diff --git a/docs/my-website/sidebars.js b/docs/my-website/sidebars.js index 59db4c363..048b04171 100644 --- a/docs/my-website/sidebars.js +++ b/docs/my-website/sidebars.js @@ -138,6 +138,7 @@ const sidebars = { "providers/watsonx", "providers/predibase", "providers/nvidia_nim", + "providers/cerebras", "providers/volcano", "providers/triton-inference-server", "providers/ollama",