feat(api): (1/n) datasets api clean up (#1573)

## PR Stack - https://github.com/meta-llama/llama-stack/pull/1573 - https://github.com/meta-llama/llama-stack/pull/1625 - https://github.com/meta-llama/llama-stack/pull/1656 - https://github.com/meta-llama/llama-stack/pull/1657 - https://github.com/meta-llama/llama-stack/pull/1658 - https://github.com/meta-llama/llama-stack/pull/1659 - https://github.com/meta-llama/llama-stack/pull/1660 **Client SDK** - https://github.com/meta-llama/llama-stack-client-python/pull/203 **CI** - 1391130488 <img width="1042" alt="image" src="https://github.com/user-attachments/assets/69636067-376d-436b-9204-896e2dd490ca" /> -- the test_rag_agent_with_attachments is flaky and not related to this PR ## Doc <img width="789" alt="image" src="https://github.com/user-attachments/assets/b88390f3-73d6-4483-b09a-a192064e32d9" /> ## Client Usage ```python client.datasets.register( source={ "type": "uri", "uri": "lsfs://mydata.jsonl", }, schema="jsonl_messages", # optional dataset_id="my_first_train_data" ) # quick prototype debugging client.datasets.register( data_reference={ "type": "rows", "rows": [ "messages": [...], ], }, schema="jsonl_messages", ) ``` ## Test Plan - CI: 1387805545 ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/datasets/test_datasets.py ``` ``` LLAMA_STACK_CONFIG=fireworks pytest -v tests/integration/scoring/test_scoring.py ``` ``` pytest -v -s --nbval-lax ./docs/notebooks/Llama_Stack_Benchmark_Evals.ipynb ```
2025-03-17 16:55:45 -07:00 · 2025-03-17 16:55:45 -07:00 · 5287b437ae
commit 5287b437ae
parent 3b35a39b8b
29 changed files with 2593 additions and 2296 deletions
--- a/docs/openapi_generator/pyopenapi/generator.py
+++ b/docs/openapi_generator/pyopenapi/generator.py
@ -435,7 +435,7 @@ class Generator:
        )
        self.schema_builder = SchemaBuilder(schema_generator)
        self.responses = {}
-        
+
        # Create standard error responses
        self._create_standard_error_responses()

@ -446,7 +446,7 @@ class Generator:
        """
        # Get the Error schema
        error_schema = self.schema_builder.classdef_to_ref(Error)
-        
+
        # Create standard error responses
        self.responses["BadRequest400"] = Response(
            description="The request was invalid or malformed",
@ -457,11 +457,11 @@ class Generator:
                        "status": 400,
                        "title": "Bad Request",
                        "detail": "The request was invalid or malformed",
-                    }
+                    },
                )
-            }
+            },
        )
-        
+
        self.responses["TooManyRequests429"] = Response(
            description="The client has sent too many requests in a given amount of time",
            content={
@ -471,11 +471,11 @@ class Generator:
                        "status": 429,
                        "title": "Too Many Requests",
                        "detail": "You have exceeded the rate limit. Please try again later.",
-                    }
+                    },
                )
-            }
+            },
        )
-        
+
        self.responses["InternalServerError500"] = Response(
            description="The server encountered an unexpected error",
            content={
@ -485,11 +485,11 @@ class Generator:
                        "status": 500,
                        "title": "Internal Server Error",
                        "detail": "An unexpected error occurred. Our team has been notified.",
-                    }
+                    },
                )
-            }
+            },
        )
-        
+
        # Add a default error response for any unhandled error cases
        self.responses["DefaultError"] = Response(
            description="An unexpected error occurred",
@ -500,9 +500,9 @@ class Generator:
                        "status": 0,
                        "title": "Error",
                        "detail": "An unexpected error occurred",
-                    }
+                    },
                )
-            }
+            },
        )

    def _build_type_tag(self, ref: str, schema: Schema) -> Tag:
@ -547,11 +547,14 @@ class Generator:
            "SyntheticDataGeneration",
            "PostTraining",
            "BatchInference",
-            "Files",
        ]:
            op.defining_class.__name__ = f"{op.defining_class.__name__} (Coming Soon)"
            print(op.defining_class.__name__)

+        # TODO (xiyan): temporary fix for datasetio inner impl + datasets api
+        # if op.defining_class.__name__ in ["DatasetIO"]:
+        #     op.defining_class.__name__ = "Datasets"
+
        doc_string = parse_type(op.func_ref)
        doc_params = dict(
            (param.name, param.description) for param in doc_string.params.values()
@ -598,7 +601,9 @@ class Generator:

        # data passed in request body as raw bytes cannot have request parameters
        if raw_bytes_request_body and op.request_params:
-            raise ValueError("Cannot have both raw bytes request body and request parameters")
+            raise ValueError(
+                "Cannot have both raw bytes request body and request parameters"
+            )

        # data passed in request body as raw bytes
        if raw_bytes_request_body:
@ -719,7 +724,7 @@ class Generator:
            responses.update(response_builder.build_response(response_options))

        assert len(responses.keys()) > 0, f"No responses found for {op.name}"
-        
+
        # Add standard error response references
        if self.options.include_standard_error_responses:
            if "400" not in responses:
@ -730,7 +735,7 @@ class Generator:
                responses["500"] = ResponseRef("InternalServerError500")
            if "default" not in responses:
                responses["default"] = ResponseRef("DefaultError")
-        
+
        if op.event_type is not None:
            builder = ContentBuilder(self.schema_builder)
            callbacks = {