Dataset Management APIs

List of APIs to manage datasets

Dataset APIs

Dataset APIs allow you to manage the datasets, such as creating new datasets, updating and list existing datasets and retrieving specific dataset metadata.

Create Dataset

This API allows you to create new datasets used by the analytical data source.

POSThttp://localhost:3000/v2/datasets/create
Header parameters
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/create', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.create",
      "ver": "v2",
      "ts": "2024-04-10T16:10:50+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
      },
      "request": {
        "dataset_id": "telemetry_record-t4",
        "type": "event",
        "name": "sb-telemetry",
        "validation_config": {
          "validate": true,
          "mode": "Strict"
        },
        "extraction_config": {
          "is_batch_event": true,
          "extraction_key": "events",
          "dedup_config": {
            "drop_duplicates": true,
            "dedup_key": "id"
          }
        },
        "dedup_config": {
          "drop_duplicates": true,
          "dedup_key": "mid"
        },
        "data_schema": {
          "$schema": "https://json-schema.org/draft/2020-12/schema",
          "type": "object",
          "properties": {
            "mid": {
              "type": "string",
              "arrival_format": "text",
              "data_type": "string"
            },
            "ets": {
              "type": "integer",
              "arrival_format": "number",
              "data_type": "epoch"
            },
            "eid": {
              "type": "string",
              "arrival_format": "text",
              "data_type": "string"
            }
          },
          "additionalProperties": true
        },
        "denorm_config": {
          "denorm_fields": [
            {
              "denorm_key": "eid",
              "denorm_out_field": "userdata",
              "dataset_id": "master-telemetry"
            }
          ]
        },
        "transformations_config": [
          {
            "field_key": "email",
            "transformation_function": {
              "type": "mask",
              "expr": "mid",
              "datatype": "string",
              "category": "pii"
            },
            "mode": "Strict"
          }
        ],
        "tags": [
          "tag1"
        ]
      }
    }),
});
const data = await response.json();

Update Dataset

This API allows you to update existing datasets, add or remove denorm fields used by the analytical data source. User can even add, remove or update transformations and connectors

PATCHhttp://localhost:3000/v2/datasets/update
Header parameters
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/update', {
    method: 'PATCH',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.update",
      "ver": "v2",
      "ts": "2024-04-10T16:10:50+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
      },
      "request": {
        "dataset_id": "telemetry_record-t4",
        "version_key": "1721135455988",
        "name": "sb-telemetry",
        "validation_config": {
          "validate": true,
          "mode": "Strict"
        },
        "extraction_config": {
          "is_batch_event": true,
          "extraction_key": "events",
          "dedup_config": {
            "drop_duplicates": true,
            "dedup_key": "ipid"
          }
        },
        "dedup_config": {
          "drop_duplicates": true,
          "dedup_key": "mid"
        },
        "data_schema": {
          "$schema": "https://json-schema.org/draft/2020-12/schema",
          "type": "object",
          "properties": {
            "midpid": {
              "type": "string",
              "arrival_format": "text",
              "data_type": "string"
            },
            "miduwi": {
              "type": "integer",
              "arrival_format": "number",
              "data_type": "epoch"
            },
            "mid": {
              "type": "string",
              "arrival_format": "text",
              "data_type": "string"
            },
            "sid": {
              "type": "string",
              "arrival_format": "text",
              "data_type": "string"
            }
          },
          "additionalProperties": true
        },
        "denorm_config": {
          "denorm_fields": [
            {
              "value": {
                "denorm_key": "eid",
                "denorm_out_field": "userdata"
              },
              "action": "remove"
            },
            {
              "value": {
                "denorm_key": "eid",
                "denorm_out_field": "edata",
                "dataset_id": "trip-details"
              },
              "action": "upsert"
            }
          ]
        },
        "transformations_config": [
          {
            "value": {
              "field_key": "email",
              "transformation_function": {
                "type": "mask",
                "expr": "mid",
                "datatype": "string",
                "category": "pii"
              },
              "mode": "Strict"
            },
            "action": "upsert"
          },
          {
            "value": {
              "field_key": "email_id",
              "transformation_function": {
                "type": "mask",
                "expr": "mid",
                "datatype": "string",
                "category": "pii"
              },
              "mode": "Strict"
            },
            "action": "remove"
          }
        ],
        "tags": [],
        "connectors_config": [
          {
            "value": {
              "connector_id": "jdbc",
              "connector_config": {
                "source_database_type": "postgresql",
                "source_database_host": "postgresql-hl.postgresql.svc.cluster.local.master",
                "source_database_port": 5432,
                "source_database_name": "obsrv_sample_datasets_1",
                "source_database_username": "postgres",
                "source_database_pwd": "postgres",
                "table": "new_york_taxi_data",
                "timestamp-column": "tpep_pickup_datetime",
                "batch-size": 100,
                "max-batches": 2
              },
              "operations_config": {
                "polling_interval": "periodic",
                "schedule": "twice"
              }
            },
            "action": "upsert"
          }
        ]
      }
    }),
});
const data = await response.json();

Read Dataset

This API allows you to read dataset from the requested dataset_id. User can request for the specific fields and status of the dataset through the request params. By default, the API returns the dataset of status "Live". This API accepts the parameter mode=edit to read the draft dataset. If a draft dataset is not found, it creates one using the live dataset and returns the dataset details.

GEThttp://localhost:3000/v2/datasets/read/{dataset_id}
Path parameters
dataset_id*string

Unique identifier for the dataset

Query parameters
Header parameters
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/read/{dataset_id}', {
    method: 'GET',
    headers: {},
});
const data = await response.json();

List Datasets

This API allows you to list all datasets. User can apply filters on dataset status and type.

POSThttp://localhost:3000/v2/datasets/list
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/list', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.list",
      "ver": "v2",
      "ts": "2024-04-10T16:10:50+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
      },
      "request": {
        "filters": {
          "status": [
            "Live"
          ]
        }
      }
    }),
});
const data = await response.json();

Generate Presigned URLs

This API generates presigned URLs to upload or download files from cloud

POSThttp://localhost:3000/v2/files/generate-url
Header parameters
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/files/generate-url', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.files.generate-url",
      "ver": "v2",
      "ts": "2024-04-19T12:58:47+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
      },
      "request": {
        "files": [
          "telemetry.json",
          "school_data.json"
        ],
        "access": "write"
      }
    }),
});
const data = await response.json();

Dataset Status Transition

This API allows you to perform status transition between 2 states. Allowed status transition are Draft to ReadyToPublish, ReadyToPublish to Live, Live to Retired and even Delete a dataset.

POSThttp://localhost:3000/v2/datasets/status-transition
Body
object
Response

OK

Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/status-transition', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.status-transition",
      "ver": "v2",
      "ts": "2024-04-19T12:58:47+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
      },
      "request": {
        "dataset_id": "telemetry-events",
        "status": "ReadyToPublish"
      }
    }),
});
const data = await response.json();

Schema Generation

This api is used to generate data schema for the given dataset event.

POSThttp://localhost:3000/v2/datasets/dataschema
Header parameters
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/dataschema', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.dataschema",
      "ver": "v2",
      "ts": "2024-04-10T16:10:50+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6d"
      },
      "request": {
        "data": [
          {
            "eid": "IMPRESSION",
            "ets": 1672657002221,
            "ver": "3.0",
            "mid": "IMPRESSION:2b5834e196f485c17c4e49d292af43c0",
            "actor": {
              "id": "0c45959486f579c24854d40a225d6161",
              "type": "User"
            },
            "context": {
              "channel": "01268904781886259221",
              "pdata": {
                "id": "staging.diksha.portal",
                "ver": "5.1.0",
                "pid": "sunbird-portal"
              },
              "env": "public",
              "sid": "23850c90-8a8c-11ed-95d0-276800e1048c",
              "did": "0c45959486f579c24854d40a225d6161",
              "cdata": [],
              "rollup": {
                "l1": "01268904781886259221"
              },
              "uid": "anonymous"
            },
            "object": {},
            "tags": [
              "01268904781886259221"
            ],
            "edata": {
              "type": "view",
              "pageid": "login",
              "subtype": "pageexit",
              "uri": "https://staging.sunbirded.org/auth/realms/sunbird/protocol/openid-connect/auth?client_id=portal&state=254efd70-6b89-4f7d-868b-5c957f54174e&redirect_uri=https%253A%252F%252Fstaging.sunbirded.org%252Fresources%253Fboard%253DState%252520(Andhra%252520Pradesh)%2526medium%253DEnglish%2526gradeLevel%253DClass%2525201%2526%2526id%253Dap_k-12_1%2526selectedTab%253Dhome%2526auth_callback%253D1&scope=openid&response_type=code&version=4",
              "visits": []
            },
            "syncts": 1672657005814,
            "@timestamp": "2023-01-02T10:56:45.814Z",
            "flags": {
              "ex_processed": true
            }
          },
          {
            "eid": "IMPRESSION",
            "ets": 1672656997928,
            "ver": "3.0",
            "mid": "50263f0f-c2d5-4b15-95f4-5384c537f6cc",
            "actor": {
              "id": "internal",
              "type": "Consumer"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "Organisation",
              "cdata": [
                {
                  "id": "50263f0f-c2d5-4b15-95f4-5384c537f6cc",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "",
              "params": [
                {
                  "method": "POST"
                },
                {
                  "url": "/v1/org/search"
                },
                {
                  "duration": 0
                },
                {
                  "status": "OK"
                }
              ]
            }
          },
          {
            "eid": "LOG",
            "ets": 1672656998024,
            "ver": "3.0",
            "mid": "4a340ad0-0665-49b6-a1fa-a581dcac4550",
            "actor": {
              "id": "internal",
              "type": "Consumer"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "Organisation",
              "cdata": [
                {
                  "id": "4a340ad0-0665-49b6-a1fa-a581dcac4550",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "{eid='LOG', edata={level=trace, requestid=4a340ad0-0665-49b6-a1fa-a581dcac4550, type=system, message=EXIT LOG: method : POST, url: /v1/org/search , For Operation : orgSearch, params=[{msgid=4a340ad0-0665-49b6-a1fa-a581dcac4550, errmsg=Invalid value null for parameter hashTagId. Please provide a valid value., resmsgid=4a340ad0-0665-49b6-a1fa-a581dcac4550, err=UOS_ORGSER0017, status=FAILED, responseCode=400}]}}",
              "params": []
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657004961,
            "ver": "3.0",
            "mid": "f34112c7242a3e3a26f0015796b029c2",
            "actor": {
              "id": "internal",
              "type": "Consumer"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "Organisation",
              "cdata": [
                {
                  "id": "f34112c7242a3e3a26f0015796b029c2",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "ElasticSearchRestHighImpl:search: calling search for index org_alias, with query = {\"from\":0,\"size\":250,\"query\":{\"bool\":{\"must\":[{\"term\":{\"isTenant.raw\":{\"value\":true,\"boost\":1.0}}},{\"term\":{\"slug.raw\":{\"value\":\"ntp\",\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[],\"excludes\":[]}}",
              "params": []
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657006595,
            "ver": "3.0",
            "mid": "d23ff123-40f0-4262-a69b-b75b46d315a1",
            "actor": {
              "id": "930a3994-cbe7-4e84-936f-4974096af6f2",
              "type": "Consumer"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "User",
              "cdata": [
                {
                  "id": "d23ff123-40f0-4262-a69b-b75b46d315a1",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "{eid='LOG', edata={level=trace, requestid=d23ff123-40f0-4262-a69b-b75b46d315a1, type=system, message=ENTRY LOG: method : GET, url: /v1/user/role/read/6ab35eea-01fd-4de0-8902-f68722caf859 , For Operation : getUserRolesById, params=[{id=null, userId=6ab35eea-01fd-4de0-8902-f68722caf859}]}}",
              "params": []
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657006611,
            "ver": "3.0",
            "mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
            "actor": {
              "id": "6ab35eea-01fd-4de0-8902-f68722caf859",
              "type": "User"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "User",
              "cdata": [
                {
                  "id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "{eid='LOG', edata={level=trace, requestid=7d944b1c-a906-4082-b42a-905aa6b78a4e, type=system, message=ENTRY LOG: method : GET, url: /v5/user/read/6ab35eea-01fd-4de0-8902-f68722caf859 , For Operation : getUserProfileV5, params=[{id=null, userId=6ab35eea-01fd-4de0-8902-f68722caf859}]}}",
              "params": []
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657006620,
            "ver": "3.0",
            "mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
            "actor": {
              "id": "6ab35eea-01fd-4de0-8902-f68722caf859",
              "type": "User"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "User",
              "cdata": [
                {
                  "id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "Cassandra query : SELECT * FROM sunbird.user_roles WHERE userId=?;",
              "params": []
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657006645,
            "ver": "3.0",
            "mid": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
            "actor": {
              "id": "6ab35eea-01fd-4de0-8902-f68722caf859",
              "type": "User"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.learning.service",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "User",
              "cdata": [
                {
                  "id": "7d944b1c-a906-4082-b42a-905aa6b78a4e",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "",
              "params": [
                {
                  "method": "GET"
                },
                {
                  "url": "/v5/user/read/6ab35eea-01fd-4de0-8902-f68722caf859"
                },
                {
                  "duration": 0
                },
                {
                  "status": "OK"
                }
              ]
            }
          },
          {
            "eid": "LOG",
            "ets": 1672657007238,
            "ver": "3.0",
            "mid": "d4d34fde-c407-efb6-03bd-9f892ca0f114",
            "actor": {
              "id": "6ab35eea-01fd-4de0-8902-f68722caf859",
              "type": "User"
            },
            "context": {
              "channel": "0126796199493140480",
              "pdata": {
                "id": "staging.sunbird.portal",
                "pid": "learner-service",
                "ver": "5.0.0"
              },
              "env": "User",
              "did": "d904c90d9f81ddac20141b94ddd606a0",
              "cdata": [
                {
                  "id": "d4d34fde-c407-efb6-03bd-9f892ca0f114",
                  "type": "Request"
                }
              ],
              "rollup": {}
            },
            "edata": {
              "level": "info",
              "type": "Api_access",
              "message": "Cassandra query : SELECT * FROM sunbird.user WHERE id=?;",
              "params": []
            }
          }
        ],
        "config": {
          "dataset": "financial_transactions"
        }
      }
    }),
});
const data = await response.json();

Dataset Status Transition

This API allows you to perform status transition between 2 states. Allowed status transition are Draft to ReadyToPublish, ReadyToPublish to Live, Live to Retired and even Delete a dataset.

POSThttp://localhost:3000/v2/datasets/status-transition
Body
object
Response

OK

Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/status-transition', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.status-transition",
      "ver": "v2",
      "ts": "2024-04-19T12:58:47+05:30",
      "params": {
        "msgid": "4a7f14c3-d61e-4d4f-be78-181834eeff6"
      },
      "request": {
        "dataset_id": "telemetry-events",
        "status": "ReadyToPublish"
      }
    }),
});
const data = await response.json();

Clone Dataset

POSThttp://localhost:3000/v2/datasets/copy
Header parameters
Body
object
Response

OK

Headers
Body
object
Request
const response = await fetch('http://localhost:3000/v2/datasets/copy', {
    method: 'POST',
    headers: {
      "Content-Type": "application/json"
    },
    body: JSON.stringify({
      "id": "api.datasets.copy",
      "ver": "v2",
      "ts": "2024-05-21T14:30:00Z",
      "params": {
        "msgid": "127384e4a-a051-4a9f-9b3f-a64a8034fad7"
      },
      "request": {
        "source": {
          "datasetId": "dataset-telemetry",
          "isLive": true
        },
        "destination": {
          "datasetId": "bew-copy-live2"
        }
      }
    }),
});
const data = await response.json();

Last updated