Skip to content

Commit

Permalink
feat(rule): ENTITY_VALUE_AS_ENTITY_NAME rule implementation
Browse files Browse the repository at this point in the history
Closes #471
  • Loading branch information
buchslava committed Apr 11, 2018
1 parent 5350398 commit 5875119
Show file tree
Hide file tree
Showing 10 changed files with 2,994 additions and 6 deletions.
8 changes: 7 additions & 1 deletion src/ddf-definitions/ddf-data-set.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { parallelLimit } from 'async';
import { compact, intersection, isEmpty } from 'lodash';
import { compact, intersection, isEmpty, includes } from 'lodash';
import { CONCEPT, ENTITY, DATA_POINT } from './constants'
import { Concept } from './concept';
import { Entity } from './entity';
Expand Down Expand Up @@ -107,6 +107,12 @@ export class DdfDataSet {
.map(record => record.concept);
}

getConceptsByType(...types: string[]) {
return this.getConcept().getAllData()
.filter(record => includes(types, record.concept_type))
.map(record => record.concept);
}

getDataPackageResources() {
return this.ddfRoot.getDataPackageResources();
}
Expand Down
47 changes: 47 additions & 0 deletions src/ddf-rules/entity-rules/entity-value-as-entity-name.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import * as path from 'path';
import { keys, includes, isString, head } from 'lodash';
import { ENTITY_VALUE_AS_ENTITY_NAME } from '../registry';
import { DdfDataSet } from '../../ddf-definitions/ddf-data-set';
import { Issue } from '../issue';
import {
CONCEPT_TYPE_ENTITY_DOMAIN,
CONCEPT_TYPE_ENTITY_SET,
IDataPackageResourceRecord
} from '../../utils/ddf-things';


const getGidByResource = (ddfDataSet: DdfDataSet, entitiesPath: string): string => {
const ddfRoot = ddfDataSet.ddfRoot;
const parsedEntitiesPath = path.parse(entitiesPath);
const relativeDdfPath = path.relative(parsedEntitiesPath.dir, ddfRoot.dataPackageDescriptor.rootFolder);
const dataPackageCompatiblePath = path.join(relativeDdfPath, parsedEntitiesPath.base);
const resource: IDataPackageResourceRecord[] = ddfRoot.getDataPackageResources()
.filter(record => record.path === dataPackageCompatiblePath && isString(record.schema.primaryKey));

return <string>head(resource).schema.primaryKey;
};

export const rule = {
rule: (ddfDataSet: DdfDataSet) => {
const entityConcepts = ddfDataSet.getConceptsByType(CONCEPT_TYPE_ENTITY_DOMAIN, CONCEPT_TYPE_ENTITY_SET);
const entitiesByFile = ddfDataSet.getEntity().getDataByFiles();
const issues = [];

for (const entityFileName of keys(entitiesByFile)) {
const gid = getGidByResource(ddfDataSet, entityFileName);

for (const record of entitiesByFile[entityFileName]) {
if (includes(entityConcepts, record[gid])) {
issues.push(new Issue(ENTITY_VALUE_AS_ENTITY_NAME)
.setPath(record.$$source)
.setData({
entityName: gid,
entityRecord: record
}))
}
}
}

return issues;
}
};
4 changes: 3 additions & 1 deletion src/ddf-rules/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import { rule as unexpectedTranslationsData } from './translation-rules/unexpect
import { rule as unexpectedDataPointTranslationsData } from './translation-rules/unexpected-data-point-translations-data';
import { rule as duplicatedDataPointTranslationKey } from './translation-rules/duplicated-data-point-translation-key';
import { rule as duplicatedTranslationKey } from './translation-rules/duplicated-translation-key';
import { rule as entityValueAsEntityName } from './entity-rules/entity-value-as-entity-name';

export const allRules = {
[registry.CONCEPT_ID_IS_NOT_UNIQUE]: conceptIdIsNotUnique,
Expand Down Expand Up @@ -84,5 +85,6 @@ export const allRules = {
[registry.INCORRECT_BOOLEAN_ENTITY]: incorrectBooleanEntity,
[registry.CONCEPT_LOOKS_LIKE_BOOLEAN]: conceptLooksLikeBoolean,
[registry.DATA_POINT_CONSTRAINT_VIOLATION]: dataPointConstraintViolation,
[registry.DUPLICATED_DATA_POINT_KEY]: duplicatedDataPointKey
[registry.DUPLICATED_DATA_POINT_KEY]: duplicatedDataPointKey,
[registry.ENTITY_VALUE_AS_ENTITY_NAME]: entityValueAsEntityName
};
11 changes: 8 additions & 3 deletions src/ddf-rules/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,14 @@ export const DATA_POINT_CONSTRAINT_VIOLATION = Symbol.for('DATA_POINT_CONSTRAINT
export const DUPLICATED_DATA_POINT_KEY = Symbol.for('DUPLICATED_DATA_POINT_KEY');
export const INCORRECT_BOOLEAN_ENTITY = Symbol.for('INCORRECT_BOOLEAN_ENTITY');
export const CONCEPT_LOOKS_LIKE_BOOLEAN = Symbol.for('CONCEPT_LOOKS_LIKE_BOOLEAN');
export const ENTITY_VALUE_AS_ENTITY_NAME = Symbol.for('ENTITY_VALUE_AS_ENTITY_NAME');

export const WARNING_TAG = Symbol.for('WARNING');
export const FILE_SYSTEM_TAG = Symbol.for('FILE_SYSTEM');
export const DATAPOINT_TAG = Symbol.for('DATAPOINT');
export const TRANSLATION_TAG = Symbol.for('TRANSLATION');
export const DATAPACKAGE_TAG = Symbol.for('DATAPACKAGE_TAG');
export const WAFFLE_SERVER_TAG = Symbol.for('WAFFLE_SERVER_TAG');

function tagsToString(tags: any[]) {
return tags.map(tag => Symbol.keyFor(tag));
Expand Down Expand Up @@ -87,7 +89,8 @@ export const tags: any = {
[DATA_POINT_CONSTRAINT_VIOLATION]: [DATAPOINT_TAG],
[DUPLICATED_DATA_POINT_KEY]: [DATAPOINT_TAG],
[INCORRECT_BOOLEAN_ENTITY]: [],
[CONCEPT_LOOKS_LIKE_BOOLEAN]: [WARNING_TAG]
[CONCEPT_LOOKS_LIKE_BOOLEAN]: [WARNING_TAG],
[ENTITY_VALUE_AS_ENTITY_NAME]: [WAFFLE_SERVER_TAG, WARNING_TAG]
};

export const descriptions = {
Expand Down Expand Up @@ -129,7 +132,8 @@ export const descriptions = {
[DATA_POINT_CONSTRAINT_VIOLATION]: 'Constraint violation for particular datapoint.',
[DUPLICATED_DATA_POINT_KEY]: 'Duplicated key is found in datapoint file.',
[INCORRECT_BOOLEAN_ENTITY]: 'Boolean entitiy field has an incorrect value.',
[CONCEPT_LOOKS_LIKE_BOOLEAN]: 'Entity contains values that look like boolean, but related entity field has a different type.'
[CONCEPT_LOOKS_LIKE_BOOLEAN]: 'Entity contains values that look like boolean, but related entity field has a different type.',
[ENTITY_VALUE_AS_ENTITY_NAME]: 'Entity value should not be equal to entity domain name or entity set name. This rule providing is critical for DDFQL and DDF reader supporting (on WS).'
};

export const howToFix = {
Expand Down Expand Up @@ -171,7 +175,8 @@ export const howToFix = {
[DATA_POINT_CONSTRAINT_VIOLATION]: 'Some datapoints do not conform the entity constraints imposed in their filenames and/or in datapackage.json. More info on DDFcsv file naming: https://open-numbers.github.io/ddf.html',
[DUPLICATED_DATA_POINT_KEY]: 'Datapoint files should have unique keys',
[INCORRECT_BOOLEAN_ENTITY]: 'Use only TRUE or FALSE values for concepts of type "boolean"',
[CONCEPT_LOOKS_LIKE_BOOLEAN]: 'Consider changeing the concept type to "boolean"'
[CONCEPT_LOOKS_LIKE_BOOLEAN]: 'Consider changing the concept type to "boolean"',
[ENTITY_VALUE_AS_ENTITY_NAME]: 'Simplest way to fix the issue is entity value renaming, in other case domain or entity set name should be changed.'
};

export const getRulesInformation = () => Object.getOwnPropertySymbols(exports.descriptions)
Expand Down
15 changes: 15 additions & 0 deletions src/utils/ddf-things.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,18 @@ export const CONCEPT_TYPES = [
CONCEPT_TYPE_ROLE,
CONCEPT_TYPE_CUSTOM_TYPE
];

export interface IDataPackageField {
name: string
}

export interface IDataPackageResourceSchema {
fields: IDataPackageField[],
primaryKey: string | string[]
}

export interface IDataPackageResourceRecord {
path: string,
name: string,
schema: IDataPackageResourceSchema
}
32 changes: 31 additions & 1 deletion test/entity-rules.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ import {
NON_UNIQUE_ENTITY_VALUE,
UNEXISTING_CONSTRAINT_VALUE,
INCORRECT_BOOLEAN_ENTITY,
CONCEPT_LOOKS_LIKE_BOOLEAN
CONCEPT_LOOKS_LIKE_BOOLEAN,
ENTITY_VALUE_AS_ENTITY_NAME
} from '../src/ddf-rules/registry';
import { Issue } from '../src/ddf-rules/issue';
import { allRules } from '../src/ddf-rules';
Expand Down Expand Up @@ -432,4 +433,33 @@ describe('rules for entry', () => {
});
});
});

describe('when "ENTITY_VALUE_AS_ENTITY_NAME" rule', () => {
it('any issue should NOT be found for a folder without the problem (SG)', done => {
ddfDataSet = new DdfDataSet('./test/fixtures/good-folder-dp', null);
ddfDataSet.load(() => {
expect(allRules[ENTITY_VALUE_AS_ENTITY_NAME].rule(ddfDataSet).length).to.equal(0);

done();
});
});

it('an issue should be found for a folder with the problem', done => {
ddfDataSet = new DdfDataSet('./test/fixtures/rules-cases/entity-value-as-entity-name', null);
ddfDataSet.load(() => {
const issues = <Issue[]>allRules[ENTITY_VALUE_AS_ENTITY_NAME].rule(ddfDataSet);
const issue: Issue = head(issues);

expect(issues.length).to.equal(1);
expect(endsWith(issue.path, 'ddf--entities--geo.csv')).to.be.true;
expect(issue.data.entityName).to.equal('geo');
expect(issue.data.entityRecord.geo).to.equal('geo');
expect(issue.data.entityRecord.geo_name).to.equal('Georgia');
expect(issue.data.entityRecord.$$source).to.equal(issue.path);
expect(issue.data.entityRecord.$$lineNumber).to.equal(4);

done();
});
});
});
});
114 changes: 114 additions & 0 deletions test/fixtures/rules-cases/entity-value-as-entity-name/datapackage.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
{
"name": "good-folder-dp",
"title": "good-folder-dp",
"description": "",
"version": "0.0.1",
"language": {
"id": "en",
"name": "English"
},
"translations": [],
"license": "",
"author": "",
"resources": [
{
"path": "ddf--concepts.csv",
"name": "ddf--concepts",
"schema": {
"fields": [
{
"name": "concept"
},
{
"name": "concept_type"
},
{
"name": "name"
}
],
"primaryKey": "concept"
}
},
{
"path": "ddf--datapoints--gas_production_bcf--by--geo--year.csv",
"name": "gas_production_bcf--by--geo--year",
"schema": {
"fields": [
{
"name": "geo"
},
{
"name": "year"
},
{
"name": "gas_production_bcf"
}
],
"primaryKey": [
"geo",
"year"
]
}
},
{
"path": "ddf--entities--geo.csv",
"name": "geo",
"schema": {
"fields": [
{
"name": "geo"
},
{
"name": "geo_name"
}
],
"primaryKey": "geo"
}
}
],
"ddfSchema": {
"datapoints": [
{
"primaryKey": [
"geo",
"year"
],
"value": "gas_production_bcf",
"resources": [
"gas_production_bcf--by--geo--year"
]
}
],
"entities": [
{
"primaryKey": [
"geo"
],
"value": "geo_name",
"resources": [
"geo"
]
}
],
"concepts": [
{
"primaryKey": [
"concept"
],
"value": "concept_type",
"resources": [
"ddf--concepts"
]
},
{
"primaryKey": [
"concept"
],
"value": "name",
"resources": [
"ddf--concepts"
]
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
concept,concept_type,name
geo,entity_domain,Geo
gas_production_bcf,measure,Gas Production – Bcf
name,string,Name
geo_name,string,Name
year,time,Year
Loading

0 comments on commit 5875119

Please sign in to comment.