Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raw stats SUPREMM refactoring #1343

Merged
merged 6 commits into from
Jun 23, 2020
Merged
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
231 changes: 204 additions & 27 deletions etl/js/lib/etl_profile.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var events = require('events'),
config = require('../config.js');
var etlv2 = require('./etlv2.js');
var fs = require('fs');
var sorting = require('./sorting.js');

var ETLProfile = module.exports = function (etlProfile) {
etlProfile.init();
Expand Down Expand Up @@ -215,6 +216,28 @@ ETLProfile.prototype.getTables = function () {
return tables;
}

/**
* Get the raw statistics configuration data.
*
* @return {object}
*/
ETLProfile.prototype.getRawStatisticsConfiguration = function () {
var rawStatsConfig = this.schema.rawStatistics;
var tables = this.getTables();

for (var t in tables) {
if ({}.hasOwnProperty.call(tables, t)) {
var tableName = tables[t].meta.schema + '.' + tables[t].name;

if (tableName === rawStatsConfig.table) {
util._extend(rawStatsConfig.fields, tables[t].columns);
}
}
}

return rawStatsConfig;
};

/**
* Write the documentation for the autogenerated tables as an HTML page.
*
Expand Down Expand Up @@ -500,6 +523,118 @@ var xdmodIntegrator = function(realmName, realmConfigRoot) {

};

/**
* Raw statistics integrator.
*
* @constructor
* @param {string} realmName - The name/ID of the realm.
* @param {string} realmDisplay - The display name of the realm.
* @param {number} realmOrder - The order prefix to prepend to the raw statistics configuration file.
*/
var RawStatsIntegrator = function (realmName, realmDisplay, realmOrder) {
/** @var {string} The realm/ID name. */
this.realmName = realmName;

/** @var {string} The realm display name. */
this.realmDisplay = realmDisplay;

/** @var {number} The raw statistics file order prefix. */
this.realmOrder = realmOrder;

/** @var {array} Raw statistics tables. */
this.tables = [];

/** @var {array} Raw statistics fields. */
this.fields = [];

/**
* @var {object} Used to lookup table alias.
* @private
*/
this.tableAliases = {};

/**
* Get a unique identifier for a table definition.
*
* @private
* @param {string} schemaName - The name of the database schema.
* @param {string} tableName - The name of the database table.
* @param {string} foreignTableAlias - Alias of table this table is joined to.
* @param {string} foreignKey - Foreign key used to join the table.
*/
this.getTableAliasKey = function (schemaName, tableName, foreignTableAlias, foreignKey) {
// This assumes that none of these values contain any dot characters.
return [schemaName, tableName, foreignTableAlias, foreignKey].join('.');
};

/**
* Add a table to the raw statistics configuration.
*
* @param {object} tableDef - Table definition.
* @param {string} tableDef.schema - Name of the database schema.
* @param {string} tableDef.name - Name of the database table.
* @param {string} tableDef.alias - Alias to use for this table.
* @param {object} tableDef.join - Join definition.
* @param {string} tableDef.join.primaryKey - Primary key of table this table is joined to.
* @param {string} tableDef.join.foreignTableAlias - Alias
* @param {string} tableDef.join.foreignKey - Foreign key used in join.
*/
this.addTable = function (tableDef) {
this.tables.push(tableDef);
var key = this.getTableAliasKey(tableDef.schema, tableDef.name, tableDef.join.foreignTableAlias, tableDef.join.foreignKey);
this.tableAliases[key] = tableDef.alias;
};

/**
* Add a field to the raw statistics configuration.
*
* @param {object} field - Field definition.
*/
this.addField = function (fieldDef) {
this.fields.push(fieldDef);
};

/**
* Find the alias of a table that's already been added.
*
* @param {string} schemaName - The name of the database schema.
* @param {string} tableName - The name of the database table.
* @param {string} foreignTableAlias - Alias of table this table is joined to.
* @param {string} foreignKey - Foreign key used to join the table.
* @return {string|null} The table's alias or null if the table has not been added.
*/
this.getTableAlias = function (schemaName, tableName, foreignTableAlias, foreignKey) {
var key = this.getTableAliasKey(schemaName, tableName, foreignTableAlias, foreignKey);
return this.tableAliases[key] ? this.tableAliases[key] : null;
};

/**
* Write raw statistics configuration to file.
*/
this.write = function () {
var rawStats = {
'+realms': [
{
name: this.realmName,
display: this.realmDisplay
}
]
};

this.fields.sort(sorting.dynamicSortMultiple('dtype', 'group', 'units', 'name'));

rawStats[realmName] = {
tables: this.tables,
fields: this.fields
};

fs.writeFileSync(
config.xdmodBuildConfigDir + '/rawstatistics.d/' + this.realmOrder + '_' + this.realmName.toLowerCase() + '.json',
JSON.stringify(rawStats, null, 4)
);
};
};

var extractandsubst = function(column, item) {
if( !column.hasOwnProperty(item) ) {
return null;
Expand Down Expand Up @@ -671,43 +806,85 @@ ETLProfile.prototype.integrateWithXDMoD = function () {
xdmodInteg.mkdirandwrite(config.xdmodBuildConfigDir + '/datawarehouse.d/ref/', realmName.toLowerCase() + '-statistics', statistics);
xdmodInteg.mkdirandwrite(config.xdmodBuildConfigDir + '/datawarehouse.d/ref/', realmName.toLowerCase() + '-group-bys', groupBys);
xdmodInteg.write();
}
var rawstats = {};
var tables = this.getTables();
for( var t in tables) {
var tableName = tables[t].meta.schema + "." + tables[t].name;
if( !(tableName in rawstats) ) {
rawstats[tableName] = [];
}
var columns = tables[t].columns;
for( var c in columns) {
var dtype = columns[c].dtype ? columns[c].dtype : (columns[c].queries ? "foreignkey" : "statistic" );
var group = columns[c].group ? columns[c].group : "misc";
var visibility = columns[c].visibility ? columns[c].visibility : 'public';
var batchExport = columns[c].batchExport ? columns[c].batchExport : false;

var name = extractandsubst(columns[c], "name");
if(!name) {
name = Namealize(c, true);
}

var rawStatsConfig = this.getRawStatisticsConfiguration();
var rawStatsInteg = new RawStatsIntegrator(rawStatsConfig.realmName, rawStatsConfig.realmDisplay, rawStatsConfig.realmOrder);
var tableIndex = 1;

for (var key in rawStatsConfig.fields) {
if ({}.hasOwnProperty.call(rawStatsConfig.fields, key)) {
var col = rawStatsConfig.fields[key];
var columnName = key;
var alias = key;
var dtype = col.dtype;
if (!dtype) {
dtype = col.queries ? 'foreignkey' : 'statistic';
}
var group = col.group ? col.group : 'misc';
var visibility = col.visibility ? col.visibility : 'public';
var batchExport = col.batchExport ? col.batchExport : false;

var name = extractandsubst(col, 'name');
if (!name) {
name = Namealize(key);
}

rawstats[tableName].push({
key: c,
// Default to using the fact table, but override for foreign
// key dtype.
var tableAlias = 'jf';

if (dtype === 'foreignkey') {
if (!col.join) {
continue;
}

var join = col.join;
var tableSchema = join.schema;
var tableName = join.table;
var foreignKey = join.foreignKey ? join.foreignKey : key;
alias = name;
columnName = join.column ? join.column : 'name';

tableAlias = rawStatsInteg.getTableAlias(tableSchema, tableName, 'jf', foreignKey);

if (tableAlias === null) {
tableAlias = 'ft' + tableIndex;
++tableIndex;

rawStatsInteg.addTable({
schema: tableSchema,
name: tableName,
alias: tableAlias,
join: {
// All tables currently have primary key 'id'
// and are joined to the fact table 'jf'.
primaryKey: 'id',
foreignTableAlias: 'jf',
foreignKey: foreignKey
}
});
}
}

rawStatsInteg.addField({
key: key,
alias: alias,
name: name,
units: columns[c].unit,
per: columns[c].per,
documentation: columns[c].comments,
tableAlias: tableAlias,
column: columnName,
dtype: dtype,
units: col.unit,
per: col.per,
documentation: col.comments,
visibility: visibility,
batchExport: batchExport,
group: group
});
}
var sorting = require("./sorting.js");
rawstats[tableName].sort(sorting.dynamicSortMultiple("dtype", "group", "units", "name"));
}
var rawStatisticsConfigFile = config.xdmodBuildConfigDir + '/rawstatisticsconfig.json';
fs.writeFileSync(rawStatisticsConfigFile, JSON.stringify(rawstats, null, 4));

rawStatsInteg.write();
} catch (exception) {
self.emit('error', util.inspect(exception));
}
Expand Down