diff --git a/docs/my-website/docs/proxy/caching.md b/docs/my-website/docs/proxy/caching.md
index 25fb4ce34..6769ec6c5 100644
--- a/docs/my-website/docs/proxy/caching.md
+++ b/docs/my-website/docs/proxy/caching.md
@@ -294,6 +294,11 @@ The proxy support 4 cache-controls:
**Turn off caching**
+Set `no-cache=True`, this will not return a cached response
+
+
+
+
```python
import os
from openai import OpenAI
@@ -319,9 +324,81 @@ chat_completion = client.chat.completions.create(
}
)
```
+
+
+
+
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-3.5-turbo",
+ "cache": {"no-cache": True},
+ "messages": [
+ {"role": "user", "content": "Say this is a test"}
+ ]
+ }'
+```
+
+
+
+
**Turn on caching**
+By default cache is always on
+
+
+
+
+```python
+import os
+from openai import OpenAI
+
+client = OpenAI(
+ # This is the default and can be omitted
+ api_key=os.environ.get("OPENAI_API_KEY"),
+ base_url="http://0.0.0.0:4000"
+)
+
+chat_completion = client.chat.completions.create(
+ messages=[
+ {
+ "role": "user",
+ "content": "Say this is a test",
+ }
+ ],
+ model="gpt-3.5-turbo"
+)
+```
+
+
+
+
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-3.5-turbo",
+ "messages": [
+ {"role": "user", "content": "Say this is a test"}
+ ]
+ }'
+```
+
+
+
+
+
+**Set `ttl`**
+
+Set `ttl=600`, this will caches response for 10 minutes (600 seconds)
+
+
+
+
```python
import os
from openai import OpenAI
@@ -347,6 +424,35 @@ chat_completion = client.chat.completions.create(
}
)
```
+
+
+
+
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-3.5-turbo",
+ "cache": {"ttl": 600},
+ "messages": [
+ {"role": "user", "content": "Say this is a test"}
+ ]
+ }'
+```
+
+
+
+
+
+
+
+**Set `s-maxage`**
+
+Set `s-maxage`, this will only get responses cached within last 10 minutes
+
+
+
```python
import os
@@ -373,6 +479,27 @@ chat_completion = client.chat.completions.create(
}
)
```
+
+
+
+
+```shell
+curl http://localhost:4000/v1/chat/completions \
+ -H "Content-Type: application/json" \
+ -H "Authorization: Bearer sk-1234" \
+ -d '{
+ "model": "gpt-3.5-turbo",
+ "cache": {"s-maxage": 600},
+ "messages": [
+ {"role": "user", "content": "Say this is a test"}
+ ]
+ }'
+```
+
+
+
+
+
### Turn on / off caching per Key.