diff --git a/.circleci/config.yml b/.circleci/config.yml index f697be521a..a1348b12cc 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -47,7 +47,7 @@ jobs: pip install opentelemetry-api==1.25.0 pip install opentelemetry-sdk==1.25.0 pip install opentelemetry-exporter-otlp==1.25.0 - pip install openai==1.34.0 + pip install openai==1.40.0 pip install prisma==0.11.0 pip install "detect_secrets==1.5.0" pip install "httpx==0.24.1" @@ -165,7 +165,7 @@ jobs: pip install "pytest==7.3.1" pip install "pytest-asyncio==0.21.1" pip install aiohttp - pip install openai + pip install "openai==1.40.0" python -m pip install --upgrade pip python -m pip install -r .circleci/requirements.txt pip install "pytest==7.3.1" diff --git a/docs/my-website/docs/completion/json_mode.md b/docs/my-website/docs/completion/json_mode.md index 92e135dff5..bf159cd07e 100644 --- a/docs/my-website/docs/completion/json_mode.md +++ b/docs/my-website/docs/completion/json_mode.md @@ -69,13 +69,10 @@ To use Structured Outputs, simply specify response_format: { "type": "json_schema", "json_schema": … , "strict": true } ``` -Works for OpenAI models - -:::info - -Support for passing in a pydantic object to litellm sdk will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842) - -::: +Works for: +- OpenAI models +- Google AI Studio - Gemini models +- Vertex AI models (Gemini + Anthropic) @@ -89,36 +86,15 @@ os.environ["OPENAI_API_KEY"] = "" messages = [{"role": "user", "content": "List 5 cookie recipes"}] +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + resp = completion( model="gpt-4o-2024-08-06", messages=messages, - response_format={ - "type": "json_schema", - "json_schema": { - "name": "math_reasoning", - "schema": { - "type": "object", - "properties": { - "steps": { - "type": "array", - "items": { - "type": "object", - "properties": { - "explanation": { "type": "string" }, - "output": { "type": "string" } - }, - "required": ["explanation", "output"], - "additionalProperties": False - } - }, - "final_answer": { "type": "string" } - }, - "required": ["steps", "final_answer"], - "additionalProperties": False - }, - "strict": True - }, - } + response_format=CalendarEvent ) print("Received={}".format(resp)) @@ -229,15 +205,15 @@ curl -X POST 'http://0.0.0.0:4000/v1/chat/completions' \ ## Validate JSON Schema -:::info -Support for doing this in the openai 'json_schema' format will be [coming soon](https://github.com/BerriAI/litellm/issues/5074#issuecomment-2272355842) +Not all vertex models support passing the json_schema to them (e.g. `gemini-1.5-flash`). To solve this, LiteLLM supports client-side validation of the json schema. -::: +``` +litellm.enable_json_schema_validation=True +``` +If `litellm.enable_json_schema_validation=True` is set, LiteLLM will validate the json response using `jsonvalidator`. -For VertexAI models, LiteLLM supports passing the `response_schema` and validating the JSON output. - -This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models. +[**See Code**](https://github.com/BerriAI/litellm/blob/671d8ac496b6229970c7f2a3bdedd6cb84f0746b/litellm/litellm_core_utils/json_validation_rule.py#L4) @@ -245,33 +221,28 @@ This works across Gemini (`vertex_ai_beta/`) + Anthropic (`vertex_ai/`) models. ```python # !gcloud auth application-default login - run this to add vertex credentials to your env - +import litellm, os from litellm import completion +from pydantic import BaseModel -messages = [{"role": "user", "content": "List 5 cookie recipes"}] -response_schema = { - "type": "array", - "items": { - "type": "object", - "properties": { - "recipe_name": { - "type": "string", - }, - }, - "required": ["recipe_name"], - }, -} +messages=[ + {"role": "system", "content": "Extract the event information."}, + {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, + ] + +litellm.enable_json_schema_validation = True +litellm.set_verbose = True # see the raw request made by litellm + +class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] resp = completion( - model="vertex_ai_beta/gemini-1.5-pro", + model="gemini/gemini-1.5-pro", messages=messages, - response_format={ - "type": "json_object", - "response_schema": response_schema, - "enforce_validation": True, # client-side json schema validation - }, - vertex_location="us-east5", + response_format=CalendarEvent, ) print("Received={}".format(resp)) @@ -279,26 +250,63 @@ print("Received={}".format(resp)) +1. Create config.yaml +```yaml +model_list: + - model_name: "gemini-1.5-flash" + litellm_params: + model: "gemini/gemini-1.5-flash" + api_key: os.environ/GEMINI_API_KEY + +litellm_settings: + enable_json_schema_validation: True +``` + +2. Start proxy + +```bash +litellm --config /path/to/config.yaml +``` + +3. Test it! + ```bash curl http://0.0.0.0:4000/v1/chat/completions \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $LITELLM_API_KEY" \ -d '{ - "model": "vertex_ai_beta/gemini-1.5-pro", - "messages": [{"role": "user", "content": "List 5 cookie recipes"}] + "model": "gemini-1.5-flash", + "messages": [ + {"role": "system", "content": "Extract the event information."}, + {"role": "user", "content": "Alice and Bob are going to a science fair on Friday."}, + ], "response_format": { "type": "json_object", - "enforce_validation: true, "response_schema": { - "type": "array", - "items": { + "type": "json_schema", + "json_schema": { + "name": "math_reasoning", + "schema": { "type": "object", "properties": { - "recipe_name": { - "type": "string", - }, + "steps": { + "type": "array", + "items": { + "type": "object", + "properties": { + "explanation": { "type": "string" }, + "output": { "type": "string" } + }, + "required": ["explanation", "output"], + "additionalProperties": false + } + }, + "final_answer": { "type": "string" } }, - "required": ["recipe_name"], + "required": ["steps", "final_answer"], + "additionalProperties": false + }, + "strict": true }, } }, diff --git a/litellm/__init__.py b/litellm/__init__.py index dfc3f3fc1b..9c8513e142 100644 --- a/litellm/__init__.py +++ b/litellm/__init__.py @@ -144,6 +144,7 @@ enable_preview_features: bool = False return_response_headers: bool = ( False # get response headers from LLM Api providers - example x-remaining-requests, ) +enable_json_schema_validation: bool = False ################## logging: bool = True enable_caching_on_provider_specific_optional_params: bool = ( diff --git a/litellm/llms/vertex_ai_anthropic.py b/litellm/llms/vertex_ai_anthropic.py index 900e7795f7..5887458527 100644 --- a/litellm/llms/vertex_ai_anthropic.py +++ b/litellm/llms/vertex_ai_anthropic.py @@ -148,7 +148,12 @@ class VertexAIAnthropicConfig: optional_params["temperature"] = value if param == "top_p": optional_params["top_p"] = value - if param == "response_format" and "response_schema" in value: + if param == "response_format" and isinstance(value, dict): + json_schema: Optional[dict] = None + if "response_schema" in value: + json_schema = value["response_schema"] + elif "json_schema" in value: + json_schema = value["json_schema"]["schema"] """ When using tools in this way: - https://docs.anthropic.com/en/docs/build-with-claude/tool-use#json-mode - You usually want to provide a single tool @@ -162,7 +167,7 @@ class VertexAIAnthropicConfig: name="json_tool_call", input_schema={ "type": "object", - "properties": {"values": value["response_schema"]}, # type: ignore + "properties": {"values": json_schema}, # type: ignore }, ) diff --git a/litellm/llms/vertex_httpx.py b/litellm/llms/vertex_httpx.py index db61b129b3..fa6308bef7 100644 --- a/litellm/llms/vertex_httpx.py +++ b/litellm/llms/vertex_httpx.py @@ -181,13 +181,17 @@ class GoogleAIStudioGeminiConfig: # key diff from VertexAI - 'frequency_penalty optional_params["stop_sequences"] = value if param == "max_tokens": optional_params["max_output_tokens"] = value - if param == "response_format" and value["type"] == "json_object": # type: ignore + if param == "response_format": # type: ignore if value["type"] == "json_object": # type: ignore - optional_params["response_mime_type"] = "application/json" - elif value["type"] == "text": # type: ignore - optional_params["response_mime_type"] = "text/plain" - if "response_schema" in value: # type: ignore - optional_params["response_schema"] = value["response_schema"] # type: ignore + if value["type"] == "json_object": # type: ignore + optional_params["response_mime_type"] = "application/json" + elif value["type"] == "text": # type: ignore + optional_params["response_mime_type"] = "text/plain" + if "response_schema" in value: # type: ignore + optional_params["response_schema"] = value["response_schema"] # type: ignore + elif value["type"] == "json_schema": # type: ignore + if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore + optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore if param == "tools" and isinstance(value, list): gtool_func_declarations = [] for tool in value: @@ -396,6 +400,9 @@ class VertexGeminiConfig: optional_params["response_mime_type"] = "text/plain" if "response_schema" in value: optional_params["response_schema"] = value["response_schema"] + elif value["type"] == "json_schema": # type: ignore + if "json_schema" in value and "schema" in value["json_schema"]: # type: ignore + optional_params["response_schema"] = value["json_schema"]["schema"] # type: ignore if param == "frequency_penalty": optional_params["frequency_penalty"] = value if param == "presence_penalty": diff --git a/litellm/main.py b/litellm/main.py index dcb3642505..1840a900a6 100644 --- a/litellm/main.py +++ b/litellm/main.py @@ -31,6 +31,7 @@ from typing import ( Literal, Mapping, Optional, + Type, Union, ) @@ -608,7 +609,7 @@ def completion( logit_bias: Optional[dict] = None, user: Optional[str] = None, # openai v1.0+ new params - response_format: Optional[dict] = None, + response_format: Optional[Union[dict, Type[BaseModel]]] = None, seed: Optional[int] = None, tools: Optional[List] = None, tool_choice: Optional[Union[str, dict]] = None, diff --git a/litellm/proxy/_new_secret_config.yaml b/litellm/proxy/_new_secret_config.yaml index f00d5ec3e7..35ef59c965 100644 --- a/litellm/proxy/_new_secret_config.yaml +++ b/litellm/proxy/_new_secret_config.yaml @@ -11,4 +11,5 @@ model_list: model: "gpt-4o" litellm_settings: + enable_json_schema_validation: true fallbacks: [{"gpt-3.5-turbo": ["gpt-4", "gpt-4o"]}] diff --git a/litellm/tests/test_amazing_vertex_completion.py b/litellm/tests/test_amazing_vertex_completion.py index 4338d63ba6..53bb9fd803 100644 --- a/litellm/tests/test_amazing_vertex_completion.py +++ b/litellm/tests/test_amazing_vertex_completion.py @@ -1192,7 +1192,15 @@ def vertex_httpx_mock_post_valid_response(*args, **kwargs): "role": "model", "parts": [ { - "text": '[{"recipe_name": "Chocolate Chip Cookies"}, {"recipe_name": "Oatmeal Raisin Cookies"}, {"recipe_name": "Peanut Butter Cookies"}, {"recipe_name": "Sugar Cookies"}, {"recipe_name": "Snickerdoodles"}]\n' + "text": """{ + "recipes": [ + {"recipe_name": "Chocolate Chip Cookies"}, + {"recipe_name": "Oatmeal Raisin Cookies"}, + {"recipe_name": "Peanut Butter Cookies"}, + {"recipe_name": "Sugar Cookies"}, + {"recipe_name": "Snickerdoodles"} + ] + }""" } ], }, @@ -1253,13 +1261,15 @@ def vertex_httpx_mock_post_valid_response_anthropic(*args, **kwargs): "id": "toolu_vrtx_01YMnYZrToPPfcmY2myP2gEB", "name": "json_tool_call", "input": { - "values": [ - {"recipe_name": "Chocolate Chip Cookies"}, - {"recipe_name": "Oatmeal Raisin Cookies"}, - {"recipe_name": "Peanut Butter Cookies"}, - {"recipe_name": "Snickerdoodle Cookies"}, - {"recipe_name": "Sugar Cookies"}, - ] + "values": { + "recipes": [ + {"recipe_name": "Chocolate Chip Cookies"}, + {"recipe_name": "Oatmeal Raisin Cookies"}, + {"recipe_name": "Peanut Butter Cookies"}, + {"recipe_name": "Snickerdoodle Cookies"}, + {"recipe_name": "Sugar Cookies"}, + ] + } }, } ], @@ -1377,16 +1387,19 @@ async def test_gemini_pro_json_schema_args_sent_httpx( from litellm.llms.custom_httpx.http_handler import HTTPHandler response_schema = { - "type": "array", - "items": { - "type": "object", - "properties": { - "recipe_name": { - "type": "string", + "type": "object", + "properties": { + "recipes": { + "type": "array", + "items": { + "type": "object", + "properties": {"recipe_name": {"type": "string"}}, + "required": ["recipe_name"], }, - }, - "required": ["recipe_name"], + } }, + "required": ["recipes"], + "additionalProperties": False, } client = HTTPHandler() @@ -1448,6 +1461,105 @@ async def test_gemini_pro_json_schema_args_sent_httpx( ) +@pytest.mark.parametrize( + "model, vertex_location, supports_response_schema", + [ + ("vertex_ai_beta/gemini-1.5-pro-001", "us-central1", True), + ("gemini/gemini-1.5-pro", None, True), + ("vertex_ai_beta/gemini-1.5-flash", "us-central1", False), + ("vertex_ai/claude-3-5-sonnet@20240620", "us-east5", False), + ], +) +@pytest.mark.parametrize( + "invalid_response", + [True, False], +) +@pytest.mark.parametrize( + "enforce_validation", + [True, False], +) +@pytest.mark.asyncio +async def test_gemini_pro_json_schema_args_sent_httpx_openai_schema( + model, + supports_response_schema, + vertex_location, + invalid_response, + enforce_validation, +): + from typing import List + + from pydantic import BaseModel + + load_vertex_ai_credentials() + os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" + litellm.model_cost = litellm.get_model_cost_map(url="") + + litellm.set_verbose = True + + messages = [{"role": "user", "content": "List 5 cookie recipes"}] + from litellm.llms.custom_httpx.http_handler import HTTPHandler + + class Recipe(BaseModel): + recipe_name: str + + class ResponseSchema(BaseModel): + recipes: List[Recipe] + + client = HTTPHandler() + + httpx_response = MagicMock() + if invalid_response is True: + if "claude" in model: + httpx_response.side_effect = ( + vertex_httpx_mock_post_invalid_schema_response_anthropic + ) + else: + httpx_response.side_effect = vertex_httpx_mock_post_invalid_schema_response + else: + if "claude" in model: + httpx_response.side_effect = vertex_httpx_mock_post_valid_response_anthropic + else: + httpx_response.side_effect = vertex_httpx_mock_post_valid_response + with patch.object(client, "post", new=httpx_response) as mock_call: + print("SENDING CLIENT POST={}".format(client.post)) + try: + resp = completion( + model=model, + messages=messages, + response_format=ResponseSchema, + vertex_location=vertex_location, + client=client, + ) + print("Received={}".format(resp)) + if invalid_response is True and enforce_validation is True: + pytest.fail("Expected this to fail") + except litellm.JSONSchemaValidationError as e: + if invalid_response is False: + pytest.fail("Expected this to pass. Got={}".format(e)) + + mock_call.assert_called_once() + if "claude" not in model: + print(mock_call.call_args.kwargs) + print(mock_call.call_args.kwargs["json"]["generationConfig"]) + + if supports_response_schema: + assert ( + "response_schema" + in mock_call.call_args.kwargs["json"]["generationConfig"] + ) + else: + assert ( + "response_schema" + not in mock_call.call_args.kwargs["json"]["generationConfig"] + ) + assert ( + "Use this JSON schema:" + in mock_call.call_args.kwargs["json"]["contents"][0]["parts"][1][ + "text" + ] + ) + + @pytest.mark.parametrize("provider", ["vertex_ai_beta"]) # "vertex_ai", @pytest.mark.asyncio async def test_gemini_pro_httpx_custom_api_base(provider): diff --git a/litellm/tests/test_completion.py b/litellm/tests/test_completion.py index fe644b08c3..3614c4e857 100644 --- a/litellm/tests/test_completion.py +++ b/litellm/tests/test_completion.py @@ -2130,6 +2130,43 @@ def test_completion_openai(): pytest.fail(f"Error occurred: {e}") +def test_completion_openai_pydantic(): + try: + litellm.set_verbose = True + from pydantic import BaseModel + + class CalendarEvent(BaseModel): + name: str + date: str + participants: list[str] + + print(f"api key: {os.environ['OPENAI_API_KEY']}") + litellm.api_key = os.environ["OPENAI_API_KEY"] + response = completion( + model="gpt-4o-2024-08-06", + messages=[{"role": "user", "content": "Hey"}], + max_tokens=10, + metadata={"hi": "bye"}, + response_format=CalendarEvent, + ) + print("This is the response object\n", response) + + response_str = response["choices"][0]["message"]["content"] + response_str_2 = response.choices[0].message.content + + cost = completion_cost(completion_response=response) + print("Cost for completion call with gpt-3.5-turbo: ", f"${float(cost):.10f}") + assert response_str == response_str_2 + assert type(response_str) == str + assert len(response_str) > 1 + + litellm.api_key = None + except Timeout as e: + pass + except Exception as e: + pytest.fail(f"Error occurred: {e}") + + def test_completion_openai_organization(): try: litellm.set_verbose = True @@ -4062,7 +4099,7 @@ def test_completion_gemini(model): if "InternalServerError" in str(e): pass else: - pytest.fail(f"Error occurred: {e}") + pytest.fail(f"Error occurred:{e}") # test_completion_gemini() diff --git a/litellm/utils.py b/litellm/utils.py index e1a686eaf7..98c8b01841 100644 --- a/litellm/utils.py +++ b/litellm/utils.py @@ -45,6 +45,8 @@ import requests import tiktoken from httpx import Proxy from httpx._utils import get_environment_proxies +from openai.lib import _parsing, _pydantic +from openai.types.chat.completion_create_params import ResponseFormat from pydantic import BaseModel from tokenizers import Tokenizer @@ -158,6 +160,7 @@ from typing import ( Literal, Optional, Tuple, + Type, Union, cast, get_args, @@ -629,8 +632,8 @@ def client(original_function): call_type == CallTypes.completion.value or call_type == CallTypes.acompletion.value ): - is_coroutine = check_coroutine(original_function) - if is_coroutine == True: + is_coroutine = check_coroutine(original_response) + if is_coroutine is True: pass else: if isinstance(original_response, ModelResponse): @@ -643,6 +646,49 @@ def client(original_function): input=model_response, model=model ) ### JSON SCHEMA VALIDATION ### + if litellm.enable_json_schema_validation is True: + try: + if ( + optional_params is not None + and "response_format" in optional_params + and optional_params["response_format"] + is not None + ): + json_response_format: Optional[dict] = None + if ( + isinstance( + optional_params["response_format"], + dict, + ) + and optional_params[ + "response_format" + ].get("json_schema") + is not None + ): + json_response_format = optional_params[ + "response_format" + ] + elif ( + _parsing._completions.is_basemodel_type( + optional_params["response_format"] + ) + ): + json_response_format = ( + type_to_response_format_param( + response_format=optional_params[ + "response_format" + ] + ) + ) + if json_response_format is not None: + litellm.litellm_core_utils.json_validation_rule.validate_schema( + schema=json_response_format[ + "json_schema" + ]["schema"], + response=model_response, + ) + except TypeError: + pass if ( optional_params is not None and "response_format" in optional_params @@ -2806,6 +2852,11 @@ def get_optional_params( message=f"Function calling is not supported by {custom_llm_provider}.", ) + if "response_format" in non_default_params: + non_default_params["response_format"] = type_to_response_format_param( + response_format=non_default_params["response_format"] + ) + if "tools" in non_default_params and isinstance( non_default_params, list ): # fixes https://github.com/BerriAI/litellm/issues/4933 @@ -6104,6 +6155,36 @@ def _should_retry(status_code: int): return False +def type_to_response_format_param( + response_format: Optional[Union[Type[BaseModel], dict]], +) -> Optional[dict]: + """ + Re-implementation of openai's 'type_to_response_format_param' function + + Used for converting pydantic object to api schema. + """ + if response_format is None: + return None + + if isinstance(response_format, dict): + return response_format + + # type checkers don't narrow the negation of a `TypeGuard` as it isn't + # a safe default behaviour but we know that at this point the `response_format` + # can only be a `type` + if not _parsing._completions.is_basemodel_type(response_format): + raise TypeError(f"Unsupported response_format type - {response_format}") + + return { + "type": "json_schema", + "json_schema": { + "schema": _pydantic.to_strict_json_schema(response_format), + "name": response_format.__name__, + "strict": True, + }, + } + + def _get_retry_after_from_exception_header( response_headers: Optional[httpx.Headers] = None, ):