change schema -> dataset_schema for register_dataset api (#443)

# What does this PR do?

- API updates: change schema to dataset_schema for register_dataset for
resolving pydantic naming conflict
- Note: this OpenAPI update will be synced with
llama-stack-client-python SDK.

cc @dineshyv 

## Test Plan

```
pytest -v -s -m meta_reference_eval_together_inference_huggingface_datasetio eval/test_eval.py
```

## Sources

Please link relevant resources if necessary.


## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Xi Yan 2024-11-13 11:17:46 -05:00 committed by GitHub
parent d5b1202c83
commit 94a6f57812
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 90 additions and 90 deletions

View file

@ -21,7 +21,7 @@
"info": {
"title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
},
"servers": [
{
@ -5170,7 +5170,7 @@
"const": "dataset",
"default": "dataset"
},
"schema": {
"dataset_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
@ -5352,7 +5352,7 @@
"provider_resource_id",
"provider_id",
"type",
"schema",
"dataset_schema",
"url",
"metadata"
]
@ -6678,7 +6678,7 @@
"dataset_id": {
"type": "string"
},
"schema": {
"dataset_schema": {
"type": "object",
"additionalProperties": {
"oneOf": [
@ -6863,7 +6863,7 @@
"additionalProperties": false,
"required": [
"dataset_id",
"schema",
"dataset_schema",
"url"
]
},
@ -7837,58 +7837,58 @@
],
"tags": [
{
"name": "Safety"
},
{
"name": "EvalTasks"
},
{
"name": "Shields"
},
{
"name": "Telemetry"
},
{
"name": "Memory"
},
{
"name": "Scoring"
},
{
"name": "ScoringFunctions"
},
{
"name": "SyntheticDataGeneration"
"name": "Inspect"
},
{
"name": "Models"
},
{
"name": "Agents"
"name": "Eval"
},
{
"name": "MemoryBanks"
"name": "EvalTasks"
},
{
"name": "DatasetIO"
"name": "Scoring"
},
{
"name": "Inference"
},
{
"name": "Datasets"
"name": "Memory"
},
{
"name": "Safety"
},
{
"name": "PostTraining"
},
{
"name": "ScoringFunctions"
},
{
"name": "Telemetry"
},
{
"name": "Shields"
},
{
"name": "BatchInference"
},
{
"name": "Eval"
"name": "MemoryBanks"
},
{
"name": "Inspect"
"name": "Datasets"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "DatasetIO"
},
{
"name": "Agents"
},
{
"name": "BuiltinTool",

View file

@ -723,23 +723,7 @@ components:
Dataset:
additionalProperties: false
properties:
identifier:
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_id:
type: string
provider_resource_id:
type: string
schema:
dataset_schema:
additionalProperties:
oneOf:
- additionalProperties: false
@ -833,6 +817,22 @@ components:
- type
type: object
type: object
identifier:
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_id:
type: string
provider_resource_id:
type: string
type:
const: dataset
default: dataset
@ -844,7 +844,7 @@ components:
- provider_resource_id
- provider_id
- type
- schema
- dataset_schema
- url
- metadata
type: object
@ -1910,21 +1910,7 @@ components:
properties:
dataset_id:
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_dataset_id:
type: string
provider_id:
type: string
schema:
dataset_schema:
additionalProperties:
oneOf:
- additionalProperties: false
@ -2018,11 +2004,25 @@ components:
- type
type: object
type: object
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_dataset_id:
type: string
provider_id:
type: string
url:
$ref: '#/components/schemas/URL'
required:
- dataset_id
- schema
- dataset_schema
- url
type: object
RegisterEvalTaskRequest:
@ -3384,7 +3384,7 @@ info:
description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543"
\ draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
title: '[DRAFT] Llama Stack Specification'
version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4748,24 +4748,24 @@ security:
servers:
- url: http://any-hosted-llama-stack.com
tags:
- name: Safety
- name: EvalTasks
- name: Shields
- name: Telemetry
- name: Memory
- name: Scoring
- name: ScoringFunctions
- name: SyntheticDataGeneration
- name: Models
- name: Agents
- name: MemoryBanks
- name: DatasetIO
- name: Inference
- name: Datasets
- name: PostTraining
- name: BatchInference
- name: Eval
- name: Inspect
- name: Models
- name: Eval
- name: EvalTasks
- name: Scoring
- name: Inference
- name: Memory
- name: Safety
- name: PostTraining
- name: ScoringFunctions
- name: Telemetry
- name: Shields
- name: BatchInference
- name: MemoryBanks
- name: Datasets
- name: SyntheticDataGeneration
- name: DatasetIO
- name: Agents
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
name: BuiltinTool
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"

View file

@ -49,7 +49,7 @@ class Datasets(Protocol):
async def register_dataset(
self,
dataset_id: str,
schema: Dict[str, ParamType],
dataset_schema: Dict[str, ParamType],
url: URL,
provider_dataset_id: Optional[str] = None,
provider_id: Optional[str] = None,

View file

@ -310,7 +310,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
async def register_dataset(
self,
dataset_id: str,
schema: Dict[str, ParamType],
dataset_schema: Dict[str, ParamType],
url: URL,
provider_dataset_id: Optional[str] = None,
provider_id: Optional[str] = None,
@ -332,7 +332,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
identifier=dataset_id,
provider_resource_id=provider_dataset_id,
provider_id=provider_id,
dataset_schema=schema,
dataset_schema=dataset_schema,
url=url,
metadata=metadata,
)

View file

@ -57,7 +57,7 @@ async def register_dataset(
await datasets_impl.register_dataset(
dataset_id=dataset_id,
schema=dataset_schema,
dataset_schema=dataset_schema,
url=URL(uri=test_url),
)

View file

@ -163,7 +163,7 @@ class Testeval:
await datasets_impl.register_dataset(
dataset_id="mmlu",
schema={
dataset_schema={
"input_query": StringType(),
"expected_answer": StringType(),
"chat_completion_input": ChatCompletionInputType(),