Skip to content

Commit

Permalink
feat(rule): UNEXPECTED_DATA rule providing
Browse files Browse the repository at this point in the history
Closes #67
  • Loading branch information
buchslava committed Oct 17, 2016
1 parent f1eddd8 commit ae31db9
Show file tree
Hide file tree
Showing 20 changed files with 505 additions and 49 deletions.
35 changes: 35 additions & 0 deletions doc/rules/UNEXPECTED_DATA.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# UNEXPECTED_DATA

## Rule test folders

`test/fixtures/rules-cases/unexpected-data/indexed`
`test/fixtures/rules-cases/unexpected-data/indexless`

## Description

An issue according to this rule will be fired when filename and header are good but content isn't: content

## Examples of correct data

```
ddf--concepts.csv
concept,concept_type,domain,name
name,string,,
geo,entity_domain,,
region,entity_set,geo,Region
country,entity_set,geo,Country
capital,entity_set,geo,Capital
pop,measure,geo,Population
year,time,,year
```

## Examples of incorrect data

```
ddf--concepts.csv
concept,concept_type,domain,name
foo
geo,entity_domain,,
```
47 changes: 47 additions & 0 deletions lib/data/csv-checker.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'use strict';

const _ = require('lodash');
const fs = require('fs');
const CsvParser = require('babyparse');

const getErrors = parsedCsv => parsedCsv.errors
.filter(error => error.row >= 0)
.map(error => ({
message: error.message,
row: error.row,
type: `${error.type}/${error.code}`,
data: parsedCsv.data[error.row]
}));

class CsvChecker {
constructor(filePath) {
this.filePath = filePath;
this.error = null;
this.errors = [];
}

check(onChecked) {
fs.readFile(this.filePath, 'utf-8', (err, fileContent) => {
if (err) {
onChecked();
return;
}

CsvParser.parse(fileContent, {
header: true,
delimiter: ',',
skipEmptyLines: true,
complete: parsedCsv => {
this.errors = getErrors(parsedCsv);
onChecked();
}
});
});
}

isCorrect() {
return _.isEmpty(this.errors);
}
}

module.exports = CsvChecker;
43 changes: 24 additions & 19 deletions lib/data/ddf-index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@ const fs = require('fs');
const _ = require('lodash');
const constants = require('../ddf-definitions/constants');
const FileDescriptor = require('./file-descriptor');
const CsvChecker = require('./csv-checker');

class DdfIndex {
constructor(ddfPath, settings) {
this.ddfPath = ddfPath;
this.settings = settings;
this.indexPath = path.resolve(ddfPath, 'ddf--index.csv');
this.csvChecker = new CsvChecker(this.indexPath);
this.content = [];
this.issues = [];
this.conceptContent = [];
Expand Down Expand Up @@ -46,7 +48,8 @@ class DdfIndex {
}

isConcept(contentRecord) {
return contentRecord.filter(contentDetail => contentDetail.key === 'concept').length > 0;
return contentRecord
.filter(contentDetail => contentDetail.key === 'concept').length > 0;
}

fillFileDescriptors() {
Expand Down Expand Up @@ -118,29 +121,31 @@ class DdfIndex {
}

check(cb) {
this.read(err => {
if (err || !this.exists) {
this.error = err;
cb();
return;
}
this.csvChecker.check(() => {
this.read(err => {
if (err || !this.exists) {
this.error = err;
cb();
return;
}

async.series(
this.fileDescriptors
.map(fileDescriptor => onFileChecked => fileDescriptor.check(onFileChecked)),
issues => {
this.issues = issues || [];
async.series(
this.fileDescriptors
.map(fileDescriptor => onFileChecked => fileDescriptor.check(onFileChecked)),
issues => {
this.issues = issues || [];

async.series(this.getConceptReading(), _err => {
if (_err) {
throw _err;
}
async.series(this.getConceptReading(), _err => {
if (_err) {
throw _err;
}

this.fileDescriptors.map(fileDescriptor => this.fillType(fileDescriptor));
this.fileDescriptors.map(fileDescriptor => this.fillType(fileDescriptor));

cb(this.issues);
cb(this.issues);
});
});
});
});
});
}
}
Expand Down
32 changes: 21 additions & 11 deletions lib/data/directory-descriptor.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';
const _ = require('lodash');
const async = require('async');
const path = require('path');
const fu = require('../utils/file');
const constants = require('../ddf-definitions/constants');
Expand Down Expand Up @@ -81,6 +82,25 @@ class DirectoryDescriptor {
this.errors = [];
}

processIndex(files, settings, cb) {
this.ddfIndex = new DdfIndex(this.dir, settings);
this.ddfIndex.check(() => {
const byFilesList = files
.filter(file => file !== 'ddf--index.csv')
.map(file => this.getFileDescriptor(this.dir, file));
const byIndex = this.ddfIndex.fileDescriptors;

this.fileDescriptors = this.ddfIndex.exists ? byIndex : byFilesList;

const actions = this.fileDescriptors
.map(fileDescriptor =>
onFileChecked =>
fileDescriptor.csvChecker.check(onFileChecked));

async.parallel(actions, err => cb(err));
});
}

check(settings, cb) {
fu.readDir(this.dir, (err, files) => {
if (err) {
Expand All @@ -95,17 +115,7 @@ class DirectoryDescriptor {
return;
}

this.ddfIndex = new DdfIndex(this.dir, settings);
this.ddfIndex.check(() => {
const byFilesList = files
.filter(file => file !== 'ddf--index.csv')
.map(file => this.getFileDescriptor(this.dir, file));
const byIndex = this.ddfIndex.fileDescriptors;

this.fileDescriptors = this.ddfIndex.exists ? byIndex : byFilesList;

cb();
});
this.processIndex(files, settings, cb);
});
}

Expand Down
7 changes: 6 additions & 1 deletion lib/data/file-descriptor.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ const _ = require('lodash');
const fs = require('fs');
const fileUtils = require('../utils/file');
const registry = require('../ddf-rules/registry');
const CsvChecker = require('./csv-checker');

function getIssueCases(fileDescriptor) {
return [
Expand Down Expand Up @@ -44,6 +45,7 @@ class FileDescriptor {
this.indexData = data.indexData;
this.issues = [];
this.fullPath = data.fullPath;
this.csvChecker = new CsvChecker(this.fullPath);
}

fillHeaders(cb) {
Expand Down Expand Up @@ -76,7 +78,10 @@ class FileDescriptor {
this.issues = _.compact(results);

if (_.isEmpty(this.issues)) {
this.fillHeaders(cb);
this.csvChecker.check(() => {
this.fillHeaders(cb);
});

return;
}

Expand Down
2 changes: 2 additions & 0 deletions lib/ddf-rules/general-rules/index.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'use strict';

const registry = require('../registry');
const unexpectedData = require('./unexpected-data');
const wrongDataPointHeader = require('./wrong-data-point-header');
const incorrectIdentifier = require('./incorrect-identifier');
const filenameDoesNotMatchHeader = require('./filename-does-not-match-header');
Expand All @@ -9,6 +10,7 @@ const nonDdfFolder = require('./non-ddf-folder');
const nonDdfDataset = require('./non-ddf-dataset');

module.exports = {
[registry.UNEXPECTED_DATA]: unexpectedData.rule,
[registry.NON_DDF_DATA_SET]: nonDdfDataset.rule,
[registry.NON_DDF_FOLDER]: nonDdfFolder.rule,
[registry.WRONG_DATA_POINT_HEADER]: wrongDataPointHeader.rule,
Expand Down
32 changes: 32 additions & 0 deletions lib/ddf-rules/general-rules/unexpected-data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
'use strict';

const registry = require('../registry');
const Issue = require('../issue');

module.exports = {
rule: ddfDataSet => {
const result = [];

ddfDataSet.ddfRoot.directoryDescriptors.forEach(directoryDescriptor => {
if (!directoryDescriptor.ddfIndex.csvChecker.isCorrect()) {
result.push(
new Issue(registry.UNEXPECTED_DATA)
.setPath(directoryDescriptor.ddfIndex.indexPath)
.setData(directoryDescriptor.ddfIndex.csvChecker.errors)
);
}

directoryDescriptor.fileDescriptors.forEach(fileDescriptor => {
if (!fileDescriptor.csvChecker.isCorrect()) {
result.push(
new Issue(registry.UNEXPECTED_DATA)
.setPath(fileDescriptor.fullPath)
.setData(fileDescriptor.csvChecker.errors)
);
}
});
});

return result;
}
};
3 changes: 3 additions & 0 deletions lib/ddf-rules/registry.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
'use strict';

exports.UNEXPECTED_DATA = Symbol.for('UNEXPECTED_DATA');
exports.NON_DDF_DATA_SET = Symbol.for('NON_DDF_DATA_SET');
exports.NON_DDF_FOLDER = Symbol.for('NON_DDF_FOLDER');
exports.INDEX_IS_NOT_FOUND = Symbol.for('INDEX_IS_NOT_FOUND');
Expand Down Expand Up @@ -33,6 +34,7 @@ function tagsToString(tags) {
}

exports.tags = {
[exports.UNEXPECTED_DATA]: [exports.FILE_SYSTEM_TAG, exports.WAFFLE_SERVER_TAG],
[exports.NON_DDF_DATA_SET]: [exports.FILE_SYSTEM_TAG],
[exports.NON_DDF_FOLDER]: [exports.WARNING_TAG, exports.FILE_SYSTEM_TAG],
[exports.INDEX_IS_NOT_FOUND]: [exports.WARNING_TAG, exports.FILE_SYSTEM_TAG],
Expand All @@ -58,6 +60,7 @@ exports.tags = {
};

exports.descriptions = {
[exports.UNEXPECTED_DATA]: 'Unexpected data: wrong CSV',
[exports.NON_DDF_DATA_SET]: 'This data set is not DDF',
[exports.NON_DDF_FOLDER]: 'This folder is not DDF',
[exports.INDEX_IS_NOT_FOUND]: 'Index is not found',
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"license": "GPL-3.0",
"dependencies": {
"async": "2.1.1",
"babyparse": "0.4.6",
"blessed": "0.1.81",
"ddf-time-utils": "0.1.6",
"deep-diff": "0.3.4",
Expand Down
51 changes: 51 additions & 0 deletions test/csv-checker.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
'use strict';
const _ = require('lodash');
const chai = require('chai');
const sinonChai = require('sinon-chai');
const expect = chai.expect;

const CsvChecker = require('../lib/data/csv-checker');

chai.use(sinonChai);

describe('csv checker', () => {
it('should be no errors for correct csv file', done => {
const csvChecker = new CsvChecker('./test/fixtures/good-folder/ddf--concepts.csv');

csvChecker.check(() => {
expect(csvChecker.isCorrect()).to.be.true;
expect(_.isEmpty(csvChecker.errors)).to.be.true;

done();
});
});

it('should be no errors for correct csv file', done => {
const csvChecker = new CsvChecker('./test/fixtures/csv/bad.csv');
const expectedErrors = [{
message: 'Too few fields: expected 3 fields but parsed 1',
row: 1,
type: 'FieldMismatch/TooFewFields',
data: {aaa: '444'}
},
{
message: 'Too few fields: expected 3 fields but parsed 2',
row: 2,
type: 'FieldMismatch/TooFewFields',
data: {aaa: '555', bbb: '777'}
},
{
message: 'Too few fields: expected 3 fields but parsed 1',
row: 3,
type: 'FieldMismatch/TooFewFields',
data: {aaa: 'foo'}
}];

csvChecker.check(() => {
expect(csvChecker.isCorrect()).to.be.false;
expect(_.isEqual(csvChecker.errors, expectedErrors)).to.be.true;

done();
});
});
});
Loading

0 comments on commit ae31db9

Please sign in to comment.