Dataset Management APIs
List of APIs to manage datasets
Dataset APIs
Dataset APIs allow you to manage the datasets, such as creating new datasets, updating and list existing datasets and retrieving specific dataset metadata.
Create Dataset
This API allows you to create new datasets used by the analytical data source.
OK
const response = await fetch('http://localhost:3000/v2/datasets/create', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.create",
"ver": "v2",
"ts": "2024-04-10T16:10:50+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
},
"request": {
"dataset_id": "telemetry_record-t4",
"type": "event",
"name": "sb-telemetry",
"validation_config": {
"validate": true,
"mode": "Strict"
},
"extraction_config": {
"is_batch_event": true,
"extraction_key": "events",
"dedup_config": {
"drop_duplicates": true,
"dedup_key": "id"
}
},
"dedup_config": {
"drop_duplicates": true,
"dedup_key": "mid"
},
"data_schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"mid": {
"type": "string",
"arrival_format": "text",
"data_type": "string"
},
"ets": {
"type": "integer",
"arrival_format": "number",
"data_type": "epoch"
},
"eid": {
"type": "string",
"arrival_format": "text",
"data_type": "string"
}
},
"additionalProperties": true
},
"denorm_config": {
"denorm_fields": [
{
"denorm_key": "eid",
"denorm_out_field": "userdata",
"dataset_id": "master-telemetry"
}
]
},
"transformations_config": [
{
"field_key": "email",
"transformation_function": {
"type": "mask",
"expr": "mid",
"datatype": "string",
"category": "pii"
},
"mode": "Strict"
}
],
"tags": [
"tag1"
]
}
}),
});
const data = await response.json();
Update Dataset
This API allows you to update existing datasets, add or remove denorm fields used by the analytical data source. User can even add, remove or update transformations and connectors
OK
const response = await fetch('http://localhost:3000/v2/datasets/update', {
method: 'PATCH',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.update",
"ver": "v2",
"ts": "2024-04-10T16:10:50+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
},
"request": {
"dataset_id": "telemetry_record-t4",
"version_key": "1721135455988",
"name": "sb-telemetry",
"validation_config": {
"validate": true,
"mode": "Strict"
},
"extraction_config": {
"is_batch_event": true,
"extraction_key": "events",
"dedup_config": {
"drop_duplicates": true,
"dedup_key": "ipid"
}
},
"dedup_config": {
"drop_duplicates": true,
"dedup_key": "mid"
},
"data_schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"type": "object",
"properties": {
"midpid": {
"type": "string",
"arrival_format": "text",
"data_type": "string"
},
"miduwi": {
"type": "integer",
"arrival_format": "number",
"data_type": "epoch"
},
"mid": {
"type": "string",
"arrival_format": "text",
"data_type": "string"
},
"sid": {
"type": "string",
"arrival_format": "text",
"data_type": "string"
}
},
"additionalProperties": true
},
"denorm_config": {
"denorm_fields": [
{
"value": {
"denorm_key": "eid",
"denorm_out_field": "userdata"
},
"action": "remove"
},
{
"value": {
"denorm_key": "eid",
"denorm_out_field": "edata",
"dataset_id": "trip-details"
},
"action": "upsert"
}
]
},
"transformations_config": [
{
"value": {
"field_key": "email",
"transformation_function": {
"type": "mask",
"expr": "mid",
"datatype": "string",
"category": "pii"
},
"mode": "Strict"
},
"action": "upsert"
},
{
"value": {
"field_key": "email_id",
"transformation_function": {
"type": "mask",
"expr": "mid",
"datatype": "string",
"category": "pii"
},
"mode": "Strict"
},
"action": "remove"
}
],
"tags": [],
"connectors_config": [
{
"value": {
"connector_id": "jdbc",
"connector_config": {
"source_database_type": "postgresql",
"source_database_host": "postgresql-hl.postgresql.svc.cluster.local.master",
"source_database_port": 5432,
"source_database_name": "obsrv_sample_datasets_1",
"source_database_username": "postgres",
"source_database_pwd": "postgres",
"table": "new_york_taxi_data",
"timestamp-column": "tpep_pickup_datetime",
"batch-size": 100,
"max-batches": 2
},
"operations_config": {
"polling_interval": "periodic",
"schedule": "twice"
}
},
"action": "upsert"
}
]
}
}),
});
const data = await response.json();
Read Dataset
This API allows you to read dataset from the requested dataset_id. User can request for the specific fields and status of the dataset through the request params. By default, the API returns the dataset of status "Live". This API accepts the parameter mode=edit to read the draft dataset. If a draft dataset is not found, it creates one using the live dataset and returns the dataset details.
Unique identifier for the dataset
OK
const response = await fetch('http://localhost:3000/v2/datasets/read/{dataset_id}', {
method: 'GET',
headers: {},
});
const data = await response.json();
List Datasets
This API allows you to list all datasets. User can apply filters on dataset status and type.
OK
const response = await fetch('http://localhost:3000/v2/datasets/list', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.list",
"ver": "v2",
"ts": "2024-04-10T16:10:50+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
},
"request": {
"filters": {
"status": [
"Live"
]
}
}
}),
});
const data = await response.json();
Generate Presigned URLs
This API generates presigned URLs to upload or download files from cloud
OK
const response = await fetch('http://localhost:3000/v2/files/generate-url', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.files.generate-url",
"ver": "v2",
"ts": "2024-04-19T12:58:47+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
},
"request": {
"files": [
"telemetry.json",
"school_data.json"
],
"access": "write"
}
}),
});
const data = await response.json();
Dataset Status Transition
This API allows you to perform status transition between 2 states. Allowed status transition are Draft to ReadyToPublish, ReadyToPublish to Live, Live to Retired and even Delete a dataset.
OK
const response = await fetch('http://localhost:3000/v2/datasets/status-transition', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.status-transition",
"ver": "v2",
"ts": "2024-04-19T12:58:47+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
},
"request": {
"dataset_id": "telemetry-events",
"status": "ReadyToPublish"
}
}),
});
const data = await response.json();
Schema Generation
This api is used to generate data schema for the given dataset event.
OK
const response = await fetch('http://localhost:3000/v2/datasets/dataschema', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.dataschema",
"ver": "v2",
"ts": "2024-04-10T16:10:50+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
},
"request": {
"data": [
{
"eid": "IMPRESSION",
"ets": 1672657002221,
"ver": "3.0",
"mid": "IMPRESSION:2b5834e196f485c17c4e49d292af43c0",
"actor": {
"id": "0c45959486f579c24854d40a225d6161",
"type": "User"
},
"context": {
"channel": "01268904781886259221",
"pdata": {
"id": "staging.diksha.portal",
"ver": "5.1.0",
"pid": "sunbird-portal"
},
"env": "public",
"sid": "23850c90-8a8c-11ed-95d0-276800e1048c",
"did": "0c45959486f579c24854d40a225d6161",
"cdata": [],
"rollup": {
"l1": "01268904781886259221"
},
"uid": "anonymous"
},
"object": {},
"tags": [
"01268904781886259221"
],
"edata": {
"type": "view",
"pageid": "login",
"subtype": "pageexit",
"uri": "https://staging.sunbirded.org/auth/realms/sunbird/protocol/openid-connect/auth?client_id=portal&state=254efd70-6b89-4f7d-868b-5c957f54174e&redirect_uri=https%253A%252F%252Fstaging.sunbirded.org%252Fresources%253Fboard%253DState%252520(Andhra%252520Pradesh)%2526medium%253DEnglish%2526gradeLevel%253DClass%2525201%2526%2526id%253Dap_k-12_1%2526selectedTab%253Dhome%2526auth_callback%253D1&scope=openid&response_type=code&version=4",
"visits": []
},
"syncts": 1672657005814,
"@timestamp": "2023-01-02T10:56:45.814Z",
"flags": {
"ex_processed": true
}
},
{
"eid": "IMPRESSION",
"ets": 1672656997928,
"ver": "3.0",
"mid": "50263f0f-c2d5-4b15-95f4-5384c537f6cc",
"actor": {
"id": "internal",
"type": "Consumer"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "Organisation",
"cdata": [
{
"id": "50263f0f-c2d5-4b15-95f4-5384c537f6cc",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "",
"params": [
{
"method": "POST"
},
{
"url": "/v1/org/search"
},
{
"duration": 0
},
{
"status": "OK"
}
]
}
},
{
"eid": "LOG",
"ets": 1672656998024,
"ver": "3.0",
"mid": "4a340ad0-0665-49b6-a1fa-a581dcac4550",
"actor": {
"id": "internal",
"type": "Consumer"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "Organisation",
"cdata": [
{
"id": "4a340ad0-0665-49b6-a1fa-a581dcac4550",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "{eid='LOG', edata={level=trace, requestid=4a340ad0-0665-49b6-a1fa-a581dcac4550, type=system, message=EXIT LOG: method : POST, url: /v1/org/search , For Operation : orgSearch, params=[{msgid=4a340ad0-0665-49b6-a1fa-a581dcac4550, errmsg=Invalid value null for parameter hashTagId. Please provide a valid value., resmsgid=4a340ad0-0665-49b6-a1fa-a581dcac4550, err=UOS_ORGSER0017, status=FAILED, responseCode=400}]}}",
"params": []
}
},
{
"eid": "LOG",
"ets": 1672657004961,
"ver": "3.0",
"mid": "f34112c7242a3e3a26f0015796b029c2",
"actor": {
"id": "internal",
"type": "Consumer"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "Organisation",
"cdata": [
{
"id": "f34112c7242a3e3a26f0015796b029c2",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "ElasticSearchRestHighImpl:search: calling search for index org_alias, with query = {\"from\":0,\"size\":250,\"query\":{\"bool\":{\"must\":[{\"term\":{\"isTenant.raw\":{\"value\":true,\"boost\":1.0}}},{\"term\":{\"slug.raw\":{\"value\":\"ntp\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[],\"excludes\":[]}}",
"params": []
}
},
{
"eid": "LOG",
"ets": 1672657006595,
"ver": "3.0",
"mid": "d23ff123-40f0-4262-a69b-b75b46d315a1",
"actor": {
"id": "930a3994-cbe7-4e84-936f-4974096af6f2",
"type": "Consumer"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "User",
"cdata": [
{
"id": "d23ff123-40f0-4262-a69b-b75b46d315a1",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "{eid='LOG', edata={level=trace, requestid=d23ff123-40f0-4262-a69b-b75b46d315a1, type=system, message=ENTRY LOG: method : GET, url: /v1/user/role/read/6ab35eea-01fd-4de0-8902-f68722caf859 , For Operation : getUserRolesById, params=[{id=null, userId=6ab35eea-01fd-4de0-8902-f68722caf859}]}}",
"params": []
}
},
{
"eid": "LOG",
"ets": 1672657006611,
"ver": "3.0",
"mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"actor": {
"id": "6ab35eea-01fd-4de0-8902-f68722caf859",
"type": "User"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "User",
"cdata": [
{
"id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "{eid='LOG', edata={level=trace, requestid=7d944b1c-a906-4082-b42a-905aa6b78a4e, type=system, message=ENTRY LOG: method : GET, url: /v5/user/read/6ab35eea-01fd-4de0-8902-f68722caf859 , For Operation : getUserProfileV5, params=[{id=null, userId=6ab35eea-01fd-4de0-8902-f68722caf859}]}}",
"params": []
}
},
{
"eid": "LOG",
"ets": 1672657006620,
"ver": "3.0",
"mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"actor": {
"id": "6ab35eea-01fd-4de0-8902-f68722caf859",
"type": "User"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "User",
"cdata": [
{
"id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "Cassandra query : SELECT * FROM sunbird.user_roles WHERE userId=?;",
"params": []
}
},
{
"eid": "LOG",
"ets": 1672657006645,
"ver": "3.0",
"mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"actor": {
"id": "6ab35eea-01fd-4de0-8902-f68722caf859",
"type": "User"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.learning.service",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "User",
"cdata": [
{
"id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "",
"params": [
{
"method": "GET"
},
{
"url": "/v5/user/read/6ab35eea-01fd-4de0-8902-f68722caf859"
},
{
"duration": 0
},
{
"status": "OK"
}
]
}
},
{
"eid": "LOG",
"ets": 1672657007238,
"ver": "3.0",
"mid": "d4d34fde-c407-efb6-03bd-9f892ca0f114",
"actor": {
"id": "6ab35eea-01fd-4de0-8902-f68722caf859",
"type": "User"
},
"context": {
"channel": "0126796199493140480",
"pdata": {
"id": "staging.sunbird.portal",
"pid": "learner-service",
"ver": "5.0.0"
},
"env": "User",
"did": "d904c90d9f81ddac20141b94ddd606a0",
"cdata": [
{
"id": "d4d34fde-c407-efb6-03bd-9f892ca0f114",
"type": "Request"
}
],
"rollup": {}
},
"edata": {
"level": "info",
"type": "Api_access",
"message": "Cassandra query : SELECT * FROM sunbird.user WHERE id=?;",
"params": []
}
}
],
"config": {
"dataset": "financial_transactions"
}
}
}),
});
const data = await response.json();
Dataset Status Transition
This API allows you to perform status transition between 2 states. Allowed status transition are Draft to ReadyToPublish, ReadyToPublish to Live, Live to Retired and even Delete a dataset.
OK
const response = await fetch('http://localhost:3000/v2/datasets/status-transition', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.status-transition",
"ver": "v2",
"ts": "2024-04-19T12:58:47+05:30",
"params": {
"msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
},
"request": {
"dataset_id": "telemetry-events",
"status": "ReadyToPublish"
}
}),
});
const data = await response.json();
Clone Dataset
OK
const response = await fetch('http://localhost:3000/v2/datasets/copy', {
method: 'POST',
headers: {
"Content-Type": "application/json"
},
body: JSON.stringify({
"id": "api.datasets.copy",
"ver": "v2",
"ts": "2024-05-21T14:30:00Z",
"params": {
"msgid": "127384e4a-a051-4a9f-9b3f-a64a8034fad7"
},
"request": {
"source": {
"datasetId": "dataset-telemetry",
"isLive": true
},
"destination": {
"datasetId": "bew-copy-live2"
}
}
}),
});
const data = await response.json();
Last updated