change schema -> dataset_schema for register_dataset api (#443)

# What does this PR do?

- API updates: change schema to dataset_schema for register_dataset for
resolving pydantic naming conflict
- Note: this OpenAPI update will be synced with
llama-stack-client-python SDK.

cc @dineshyv 

## Test Plan

```
pytest -v -s -m meta_reference_eval_together_inference_huggingface_datasetio eval/test_eval.py
```

## Sources

Please link relevant resources if necessary.


## Before submitting

- [ ] This PR fixes a typo or improves the docs (you can dismiss the
other checks if that's the case).
- [ ] Ran pre-commit to handle lint / formatting issues.
- [ ] Read the [contributor
guideline](https://github.com/meta-llama/llama-stack/blob/main/CONTRIBUTING.md),
      Pull Request section?
- [ ] Updated relevant documentation.
- [ ] Wrote necessary unit or integration tests.
This commit is contained in:
Xi Yan 2024-11-13 11:17:46 -05:00 committed by GitHub
parent d5b1202c83
commit 94a6f57812
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 90 additions and 90 deletions

View file

@ -21,7 +21,7 @@
"info": { "info": {
"title": "[DRAFT] Llama Stack Specification", "title": "[DRAFT] Llama Stack Specification",
"version": "0.0.1", "version": "0.0.1",
"description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543" "description": "This is the specification of the llama stack that provides\n a set of endpoints and their corresponding interfaces that are tailored to\n best leverage Llama Models. The specification is still in draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
}, },
"servers": [ "servers": [
{ {
@ -5170,7 +5170,7 @@
"const": "dataset", "const": "dataset",
"default": "dataset" "default": "dataset"
}, },
"schema": { "dataset_schema": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
"oneOf": [ "oneOf": [
@ -5352,7 +5352,7 @@
"provider_resource_id", "provider_resource_id",
"provider_id", "provider_id",
"type", "type",
"schema", "dataset_schema",
"url", "url",
"metadata" "metadata"
] ]
@ -6678,7 +6678,7 @@
"dataset_id": { "dataset_id": {
"type": "string" "type": "string"
}, },
"schema": { "dataset_schema": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
"oneOf": [ "oneOf": [
@ -6863,7 +6863,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"dataset_id", "dataset_id",
"schema", "dataset_schema",
"url" "url"
] ]
}, },
@ -7837,58 +7837,58 @@
], ],
"tags": [ "tags": [
{ {
"name": "Safety" "name": "Inspect"
},
{
"name": "EvalTasks"
},
{
"name": "Shields"
},
{
"name": "Telemetry"
},
{
"name": "Memory"
},
{
"name": "Scoring"
},
{
"name": "ScoringFunctions"
},
{
"name": "SyntheticDataGeneration"
}, },
{ {
"name": "Models" "name": "Models"
}, },
{ {
"name": "Agents" "name": "Eval"
}, },
{ {
"name": "MemoryBanks" "name": "EvalTasks"
}, },
{ {
"name": "DatasetIO" "name": "Scoring"
}, },
{ {
"name": "Inference" "name": "Inference"
}, },
{ {
"name": "Datasets" "name": "Memory"
},
{
"name": "Safety"
}, },
{ {
"name": "PostTraining" "name": "PostTraining"
}, },
{
"name": "ScoringFunctions"
},
{
"name": "Telemetry"
},
{
"name": "Shields"
},
{ {
"name": "BatchInference" "name": "BatchInference"
}, },
{ {
"name": "Eval" "name": "MemoryBanks"
}, },
{ {
"name": "Inspect" "name": "Datasets"
},
{
"name": "SyntheticDataGeneration"
},
{
"name": "DatasetIO"
},
{
"name": "Agents"
}, },
{ {
"name": "BuiltinTool", "name": "BuiltinTool",

View file

@ -723,23 +723,7 @@ components:
Dataset: Dataset:
additionalProperties: false additionalProperties: false
properties: properties:
identifier: dataset_schema:
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_id:
type: string
provider_resource_id:
type: string
schema:
additionalProperties: additionalProperties:
oneOf: oneOf:
- additionalProperties: false - additionalProperties: false
@ -833,6 +817,22 @@ components:
- type - type
type: object type: object
type: object type: object
identifier:
type: string
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_id:
type: string
provider_resource_id:
type: string
type: type:
const: dataset const: dataset
default: dataset default: dataset
@ -844,7 +844,7 @@ components:
- provider_resource_id - provider_resource_id
- provider_id - provider_id
- type - type
- schema - dataset_schema
- url - url
- metadata - metadata
type: object type: object
@ -1910,21 +1910,7 @@ components:
properties: properties:
dataset_id: dataset_id:
type: string type: string
metadata: dataset_schema:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_dataset_id:
type: string
provider_id:
type: string
schema:
additionalProperties: additionalProperties:
oneOf: oneOf:
- additionalProperties: false - additionalProperties: false
@ -2018,11 +2004,25 @@ components:
- type - type
type: object type: object
type: object type: object
metadata:
additionalProperties:
oneOf:
- type: 'null'
- type: boolean
- type: number
- type: string
- type: array
- type: object
type: object
provider_dataset_id:
type: string
provider_id:
type: string
url: url:
$ref: '#/components/schemas/URL' $ref: '#/components/schemas/URL'
required: required:
- dataset_id - dataset_id
- schema - dataset_schema
- url - url
type: object type: object
RegisterEvalTaskRequest: RegisterEvalTaskRequest:
@ -3384,7 +3384,7 @@ info:
description: "This is the specification of the llama stack that provides\n \ description: "This is the specification of the llama stack that provides\n \
\ a set of endpoints and their corresponding interfaces that are tailored\ \ a set of endpoints and their corresponding interfaces that are tailored\
\ to\n best leverage Llama Models. The specification is still in\ \ to\n best leverage Llama Models. The specification is still in\
\ draft and subject to change.\n Generated at 2024-11-12 15:47:15.607543" \ draft and subject to change.\n Generated at 2024-11-13 11:02:50.081698"
title: '[DRAFT] Llama Stack Specification' title: '[DRAFT] Llama Stack Specification'
version: 0.0.1 version: 0.0.1
jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema jsonSchemaDialect: https://json-schema.org/draft/2020-12/schema
@ -4748,24 +4748,24 @@ security:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
tags: tags:
- name: Safety
- name: EvalTasks
- name: Shields
- name: Telemetry
- name: Memory
- name: Scoring
- name: ScoringFunctions
- name: SyntheticDataGeneration
- name: Models
- name: Agents
- name: MemoryBanks
- name: DatasetIO
- name: Inference
- name: Datasets
- name: PostTraining
- name: BatchInference
- name: Eval
- name: Inspect - name: Inspect
- name: Models
- name: Eval
- name: EvalTasks
- name: Scoring
- name: Inference
- name: Memory
- name: Safety
- name: PostTraining
- name: ScoringFunctions
- name: Telemetry
- name: Shields
- name: BatchInference
- name: MemoryBanks
- name: Datasets
- name: SyntheticDataGeneration
- name: DatasetIO
- name: Agents
- description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" /> - description: <SchemaDefinition schemaRef="#/components/schemas/BuiltinTool" />
name: BuiltinTool name: BuiltinTool
- description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage" - description: <SchemaDefinition schemaRef="#/components/schemas/CompletionMessage"

View file

@ -49,7 +49,7 @@ class Datasets(Protocol):
async def register_dataset( async def register_dataset(
self, self,
dataset_id: str, dataset_id: str,
schema: Dict[str, ParamType], dataset_schema: Dict[str, ParamType],
url: URL, url: URL,
provider_dataset_id: Optional[str] = None, provider_dataset_id: Optional[str] = None,
provider_id: Optional[str] = None, provider_id: Optional[str] = None,

View file

@ -310,7 +310,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
async def register_dataset( async def register_dataset(
self, self,
dataset_id: str, dataset_id: str,
schema: Dict[str, ParamType], dataset_schema: Dict[str, ParamType],
url: URL, url: URL,
provider_dataset_id: Optional[str] = None, provider_dataset_id: Optional[str] = None,
provider_id: Optional[str] = None, provider_id: Optional[str] = None,
@ -332,7 +332,7 @@ class DatasetsRoutingTable(CommonRoutingTableImpl, Datasets):
identifier=dataset_id, identifier=dataset_id,
provider_resource_id=provider_dataset_id, provider_resource_id=provider_dataset_id,
provider_id=provider_id, provider_id=provider_id,
dataset_schema=schema, dataset_schema=dataset_schema,
url=url, url=url,
metadata=metadata, metadata=metadata,
) )

View file

@ -57,7 +57,7 @@ async def register_dataset(
await datasets_impl.register_dataset( await datasets_impl.register_dataset(
dataset_id=dataset_id, dataset_id=dataset_id,
schema=dataset_schema, dataset_schema=dataset_schema,
url=URL(uri=test_url), url=URL(uri=test_url),
) )

View file

@ -163,7 +163,7 @@ class Testeval:
await datasets_impl.register_dataset( await datasets_impl.register_dataset(
dataset_id="mmlu", dataset_id="mmlu",
schema={ dataset_schema={
"input_query": StringType(), "input_query": StringType(),
"expected_answer": StringType(), "expected_answer": StringType(),
"chat_completion_input": ChatCompletionInputType(), "chat_completion_input": ChatCompletionInputType(),