Merge pull request #5079 from BerriAI/litellm_add_pydantic_model_support

feat(utils.py): support passing response_format as pydantic model
This commit is contained in:
Krish Dholakia 2024-08-07 14:43:05 -07:00 committed by GitHub
commit 2e434d56e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 353 additions and 100 deletions

View file

@ -47,7 +47,7 @@ jobs:
pip install opentelemetry-api==1.25.0
pip install opentelemetry-sdk==1.25.0
pip install opentelemetry-exporter-otlp==1.25.0
pip install openai==1.34.0
pip install openai==1.40.0
pip install prisma==0.11.0
pip install "detect_secrets==1.5.0"
pip install "httpx==0.24.1"
@ -165,7 +165,7 @@ jobs:
pip install "pytest==7.3.1"
pip install "pytest-asyncio==0.21.1"
pip install aiohttp
pip install openai
pip install "openai==1.40.0"
python -m pip install --upgrade pip
python -m pip install -r .circleci/requirements.txt
pip install "pytest==7.3.1"

View file

@ -69,13 +69,10 @@ To use Structured Outputs, simply specify
response_format: { "type": "json_schema", "json_schema": … , "strict": true }
```
Works for OpenAI models
:::info
Support for passing in a pydantic object to litellm sdk will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842)
:::
Works for:
- OpenAI models
- Google AI Studio - Gemini models
- Vertex AI models (Gemini + Anthropic)
<Tabs>
<TabItem value="sdk" label="SDK">
@ -89,36 +86,15 @@ os.environ["OPENAI_API_KEY"] = ""
messages = [{"role": "user", "content": "List 5 cookie recipes"}]
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
resp = completion(
model="gpt-4o-2024-08-06",
messages=messages,
response_format={
"type": "json_schema",
"json_schema": {
"name": "math_reasoning",
"schema": {
"type": "object",
"properties": {
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": { "type": "string" },
"output": { "type": "string" }
},
"required": ["explanation", "output"],
"additionalProperties": False
}
},
"final_answer": { "type": "string" }
},
"required": ["steps", "final_answer"],
"additionalProperties": False
},
"strict": True
},
}
response_format=CalendarEvent
)
print("Received={}".format(resp))
@ -229,15 +205,15 @@ curl -X POST 'http://0.0.0.0:4000/v1/chat/completions' \
## Validate JSON Schema
:::info
Support for doing this in the openai 'json_schema' format will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842)
Not all vertex models support passing the json_schema to them (e.g. `gemini-1.5-flash`). To solve this, LiteLLM supports client-side validation of the json schema.
:::
```
litellm.enable_json_schema_validation=True
```
If `litellm.enable_json_schema_validation=True` is set, LiteLLM will validate the json response using `jsonvalidator`.
For VertexAI models, LiteLLM supports passing the `response_schema` and validating the JSON output.
This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models.
[**See Code**](https://github.com/BerriAI/litellm/blob/671d8ac496b6229970c7f2a3bdedd6cb84f0746b/litellm/litellm_core_utils/json_validation_rule.py#L4)
<Tabs>
@ -245,33 +221,28 @@ This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models.
```python
# !gcloud auth application-default login - run this to add vertex credentials to your env
import litellm, os
from litellm import completion
from pydantic import BaseModel
messages = [{"role": "user", "content": "List 5 cookie recipes"}]
response_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
},
"required": ["recipe_name"],
},
}
messages=[
{"role": "system", "content": "Extract the event information."},
{"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
]
litellm.enable_json_schema_validation = True
litellm.set_verbose = True # see the raw request made by litellm
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
resp = completion(
model="vertex_ai_beta/gemini-1.5-pro",
model="gemini/gemini-1.5-pro",
messages=messages,
response_format={
"type": "json_object",
"response_schema": response_schema,
"enforce_validation": True, # client-side json schema validation
},
vertex_location="us-east5",
response_format=CalendarEvent,
)
print("Received={}".format(resp))
@ -279,26 +250,63 @@ print("Received={}".format(resp))
</TabItem>
<TabItem value="proxy" label="PROXY">
1. Create config.yaml
```yaml
model_list:
- model_name: "gemini-1.5-flash"
litellm_params:
model: "gemini/gemini-1.5-flash"
api_key: os.environ/GEMINI_API_KEY
litellm_settings:
enable_json_schema_validation: True
```
2. Start proxy
```bash
litellm --config /path/to/config.yaml
```
3. Test it!
```bash
curl http://0.0.0.0:4000/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer $LITELLM_API_KEY" \
-d '{
"model": "vertex_ai_beta/gemini-1.5-pro",
"messages": [{"role": "user", "content": "List 5 cookie recipes"}]
"model": "gemini-1.5-flash",
"messages": [
{"role": "system", "content": "Extract the event information."},
{"role": "user", "content": "Alice and Bob are going to a science fair on Friday."},
],
"response_format": {
"type": "json_object",
"enforce_validation: true,
"response_schema": {
"type": "array",
"items": {
"type": "json_schema",
"json_schema": {
"name": "math_reasoning",
"schema": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
},
"steps": {
"type": "array",
"items": {
"type": "object",
"properties": {
"explanation": { "type": "string" },
"output": { "type": "string" }
},
"required": ["explanation", "output"],
"additionalProperties": false
}
},
"final_answer": { "type": "string" }
},
"required": ["recipe_name"],
"required": ["steps", "final_answer"],
"additionalProperties": false
},
"strict": true
},
}
},

View file

@ -144,6 +144,7 @@ enable_preview_features: bool = False
return_response_headers: bool = (
False # get response headers from LLM Api providers - example x-remaining-requests,
)
enable_json_schema_validation: bool = False
##################
logging: bool = True
enable_caching_on_provider_specific_optional_params: bool = (

View file

@ -148,7 +148,12 @@ class VertexAIAnthropicConfig:
optional_params["temperature"] = value
if param == "top_p":
optional_params["top_p"] = value
if param == "response_format" and "response_schema" in value:
if param == "response_format" and isinstance(value, dict):
json_schema: Optional[dict] = None
if "response_schema" in value:
json_schema = value["response_schema"]
elif "json_schema" in value:
json_schema = value["json_schema"]["schema"]
"""
When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode
- You usually want to provide a single tool
@ -162,7 +167,7 @@ class VertexAIAnthropicConfig:
name="json_tool_call",
input_schema={
"type": "object",
"properties": {"values": value["response_schema"]}, # type: ignore
"properties": {"values": json_schema}, # type: ignore
},
)

View file

@ -181,13 +181,17 @@ class GoogleAIStudioGeminiConfig: # key diff from VertexAI - 'frequency_penalty
optional_params["stop_sequences"] = value
if param == "max_tokens":
optional_params["max_output_tokens"] = value
if param == "response_format" and value["type"] == "json_object": # type: ignore
if param == "response_format": # type: ignore
if value["type"] == "json_object": # type: ignore
optional_params["response_mime_type"] = "application/json"
elif value["type"] == "text": # type: ignore
optional_params["response_mime_type"] = "text/plain"
if "response_schema" in value: # type: ignore
optional_params["response_schema"] = value["response_schema"] # type: ignore
if value["type"] == "json_object": # type: ignore
optional_params["response_mime_type"] = "application/json"
elif value["type"] == "text": # type: ignore
optional_params["response_mime_type"] = "text/plain"
if "response_schema" in value: # type: ignore
optional_params["response_schema"] = value["response_schema"] # type: ignore
elif value["type"] == "json_schema": # type: ignore
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
if param == "tools" and isinstance(value, list):
gtool_func_declarations = []
for tool in value:
@ -396,6 +400,9 @@ class VertexGeminiConfig:
optional_params["response_mime_type"] = "text/plain"
if "response_schema" in value:
optional_params["response_schema"] = value["response_schema"]
elif value["type"] == "json_schema": # type: ignore
if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore
optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore
if param == "frequency_penalty":
optional_params["frequency_penalty"] = value
if param == "presence_penalty":

View file

@ -31,6 +31,7 @@ from typing import (
Literal,
Mapping,
Optional,
Type,
Union,
)
@ -608,7 +609,7 @@ def completion(
logit_bias: Optional[dict] = None,
user: Optional[str] = None,
# openai v1.0+ new params
response_format: Optional[dict] = None,
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
seed: Optional[int] = None,
tools: Optional[List] = None,
tool_choice: Optional[Union[str, dict]] = None,

View file

@ -11,4 +11,5 @@ model_list:
model: "gpt-4o"
litellm_settings:
enable_json_schema_validation: true
fallbacks: [{"gpt-3.5-turbo": ["gpt-4", "gpt-4o"]}]

View file

@ -1192,7 +1192,15 @@ def vertex_httpx_mock_post_valid_response(*args, **kwargs):
"role": "model",
"parts": [
{
"text": '[{"recipe_name": "Chocolate Chip Cookies"}, {"recipe_name": "Oatmeal Raisin Cookies"}, {"recipe_name": "Peanut Butter Cookies"}, {"recipe_name": "Sugar Cookies"}, {"recipe_name": "Snickerdoodles"}]\n'
"text": """{
"recipes": [
{"recipe_name": "Chocolate Chip Cookies"},
{"recipe_name": "Oatmeal Raisin Cookies"},
{"recipe_name": "Peanut Butter Cookies"},
{"recipe_name": "Sugar Cookies"},
{"recipe_name": "Snickerdoodles"}
]
}"""
}
],
},
@ -1253,13 +1261,15 @@ def vertex_httpx_mock_post_valid_response_anthropic(*args, **kwargs):
"id": "toolu_vrtx_01YMnYZrToPPfcmY2myP2gEB",
"name": "json_tool_call",
"input": {
"values": [
{"recipe_name": "Chocolate Chip Cookies"},
{"recipe_name": "Oatmeal Raisin Cookies"},
{"recipe_name": "Peanut Butter Cookies"},
{"recipe_name": "Snickerdoodle Cookies"},
{"recipe_name": "Sugar Cookies"},
]
"values": {
"recipes": [
{"recipe_name": "Chocolate Chip Cookies"},
{"recipe_name": "Oatmeal Raisin Cookies"},
{"recipe_name": "Peanut Butter Cookies"},
{"recipe_name": "Snickerdoodle Cookies"},
{"recipe_name": "Sugar Cookies"},
]
}
},
}
],
@ -1377,16 +1387,19 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
from litellm.llms.custom_httpx.http_handler import HTTPHandler
response_schema = {
"type": "array",
"items": {
"type": "object",
"properties": {
"recipe_name": {
"type": "string",
"type": "object",
"properties": {
"recipes": {
"type": "array",
"items": {
"type": "object",
"properties": {"recipe_name": {"type": "string"}},
"required": ["recipe_name"],
},
},
"required": ["recipe_name"],
}
},
"required": ["recipes"],
"additionalProperties": False,
}
client = HTTPHandler()
@ -1448,6 +1461,105 @@ async def test_gemini_pro_json_schema_args_sent_httpx(
)
@pytest.mark.parametrize(
"model, vertex_location, supports_response_schema",
[
("vertex_ai_beta/gemini-1.5-pro-001", "us-central1", True),
("gemini/gemini-1.5-pro", None, True),
("vertex_ai_beta/gemini-1.5-flash", "us-central1", False),
("vertex_ai/claude-3-5-sonnet@20240620", "us-east5", False),
],
)
@pytest.mark.parametrize(
"invalid_response",
[True, False],
)
@pytest.mark.parametrize(
"enforce_validation",
[True, False],
)
@pytest.mark.asyncio
async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema(
model,
supports_response_schema,
vertex_location,
invalid_response,
enforce_validation,
):
from typing import List
from pydantic import BaseModel
load_vertex_ai_credentials()
os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
litellm.model_cost = litellm.get_model_cost_map(url="")
litellm.set_verbose = True
messages = [{"role": "user", "content": "List 5 cookie recipes"}]
from litellm.llms.custom_httpx.http_handler import HTTPHandler
class Recipe(BaseModel):
recipe_name: str
class ResponseSchema(BaseModel):
recipes: List[Recipe]
client = HTTPHandler()
httpx_response = MagicMock()
if invalid_response is True:
if "claude" in model:
httpx_response.side_effect = (
vertex_httpx_mock_post_invalid_schema_response_anthropic
)
else:
httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response
else:
if "claude" in model:
httpx_response.side_effect = vertex_httpx_mock_post_valid_response_anthropic
else:
httpx_response.side_effect = vertex_httpx_mock_post_valid_response
with patch.object(client, "post", new=httpx_response) as mock_call:
print("SENDING CLIENT POST={}".format(client.post))
try:
resp = completion(
model=model,
messages=messages,
response_format=ResponseSchema,
vertex_location=vertex_location,
client=client,
)
print("Received={}".format(resp))
if invalid_response is True and enforce_validation is True:
pytest.fail("Expected this to fail")
except litellm.JSONSchemaValidationError as e:
if invalid_response is False:
pytest.fail("Expected this to pass. Got={}".format(e))
mock_call.assert_called_once()
if "claude" not in model:
print(mock_call.call_args.kwargs)
print(mock_call.call_args.kwargs["json"]["generationConfig"])
if supports_response_schema:
assert (
"response_schema"
in mock_call.call_args.kwargs["json"]["generationConfig"]
)
else:
assert (
"response_schema"
not in mock_call.call_args.kwargs["json"]["generationConfig"]
)
assert (
"Use this JSON schema:"
in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1][
"text"
]
)
@pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai",
@pytest.mark.asyncio
async def test_gemini_pro_httpx_custom_api_base(provider):

View file

@ -2130,6 +2130,43 @@ def test_completion_openai():
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_pydantic():
try:
litellm.set_verbose = True
from pydantic import BaseModel
class CalendarEvent(BaseModel):
name: str
date: str
participants: list[str]
print(f"api key: {os.environ['OPENAI_API_KEY']}")
litellm.api_key = os.environ["OPENAI_API_KEY"]
response = completion(
model="gpt-4o-2024-08-06",
messages=[{"role": "user", "content": "Hey"}],
max_tokens=10,
metadata={"hi": "bye"},
response_format=CalendarEvent,
)
print("This is the response object\n", response)
response_str = response["choices"][0]["message"]["content"]
response_str_2 = response.choices[0].message.content
cost = completion_cost(completion_response=response)
print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}")
assert response_str == response_str_2
assert type(response_str) == str
assert len(response_str) > 1
litellm.api_key = None
except Timeout as e:
pass
except Exception as e:
pytest.fail(f"Error occurred: {e}")
def test_completion_openai_organization():
try:
litellm.set_verbose = True
@ -4062,7 +4099,7 @@ def test_completion_gemini(model):
if "InternalServerError" in str(e):
pass
else:
pytest.fail(f"Error occurred: {e}")
pytest.fail(f"Error occurred:{e}")
# test_completion_gemini()

View file

@ -45,6 +45,8 @@ import requests
import tiktoken
from httpx import Proxy
from httpx._utils import get_environment_proxies
from openai.lib import _parsing, _pydantic
from openai.types.chat.completion_create_params import ResponseFormat
from pydantic import BaseModel
from tokenizers import Tokenizer
@ -158,6 +160,7 @@ from typing import (
Literal,
Optional,
Tuple,
Type,
Union,
cast,
get_args,
@ -629,8 +632,8 @@ def client(original_function):
call_type == CallTypes.completion.value
or call_type == CallTypes.acompletion.value
):
is_coroutine = check_coroutine(original_function)
if is_coroutine == True:
is_coroutine = check_coroutine(original_response)
if is_coroutine is True:
pass
else:
if isinstance(original_response, ModelResponse):
@ -643,6 +646,49 @@ def client(original_function):
input=model_response, model=model
)
### JSON SCHEMA VALIDATION ###
if litellm.enable_json_schema_validation is True:
try:
if (
optional_params is not None
and "response_format" in optional_params
and optional_params["response_format"]
is not None
):
json_response_format: Optional[dict] = None
if (
isinstance(
optional_params["response_format"],
dict,
)
and optional_params[
"response_format"
].get("json_schema")
is not None
):
json_response_format = optional_params[
"response_format"
]
elif (
_parsing._completions.is_basemodel_type(
optional_params["response_format"]
)
):
json_response_format = (
type_to_response_format_param(
response_format=optional_params[
"response_format"
]
)
)
if json_response_format is not None:
litellm.litellm_core_utils.json_validation_rule.validate_schema(
schema=json_response_format[
"json_schema"
]["schema"],
response=model_response,
)
except TypeError:
pass
if (
optional_params is not None
and "response_format" in optional_params
@ -2806,6 +2852,11 @@ def get_optional_params(
message=f"Function calling is not supported by {custom_llm_provider}.",
)
if "response_format" in non_default_params:
non_default_params["response_format"] = type_to_response_format_param(
response_format=non_default_params["response_format"]
)
if "tools" in non_default_params and isinstance(
non_default_params, list
): # fixes https://github.com/BerriAI/litellm/issues/4933
@ -6104,6 +6155,36 @@ def _should_retry(status_code: int):
return False
def type_to_response_format_param(
response_format: Optional[Union[Type[BaseModel], dict]],
) -> Optional[dict]:
"""
Re-implementation of openai's 'type_to_response_format_param' function
Used for converting pydantic object to api schema.
"""
if response_format is None:
return None
if isinstance(response_format, dict):
return response_format
# type checkers don't narrow the negation of a `TypeGuard` as it isn't
# a safe default behaviour but we know that at this point the `response_format`
# can only be a `type`
if not _parsing._completions.is_basemodel_type(response_format):
raise TypeError(f"Unsupported response_format type - {response_format}")
return {
"type": "json_schema",
"json_schema": {
"schema": _pydantic.to_strict_json_schema(response_format),
"name": response_format.__name__,
"strict": True,
},
}
def _get_retry_after_from_exception_header(
response_headers: Optional[httpx.Headers] = None,
):