Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve verification of resource codes #720

Merged
merged 8 commits into from
Nov 10, 2018
42 changes: 42 additions & 0 deletions classes/ETL/EtlOverseer.php
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ public function verifyActions(

if ( isset($action->getOptions()->include_only_resource_codes) || isset($action->getOptions()->exclude_resource_codes) ) {
$this->queryResourceCodeToIdMap($etlConfig);

// Verify that specified resource codes are valid

$codeList = array_merge(
$action->getOptions()->include_only_resource_codes,
$action->getOptions()->exclude_resource_codes
);
$this->verifyResourceCodes($codeList);
}

$action->initialize($this->etlOverseerOptions);
Expand Down Expand Up @@ -273,6 +281,32 @@ protected function queryResourceCodeToIdMap(EtlConfiguration $etlConfig)
$this->etlOverseerOptions->setResourceCodeToIdMap($map);
}

/**
* Verify that the specified resource codes are present in the resource code to resource id map.
*
* @param array $codeList A list of resource codes to verify.
*
* @return Nothing.
* @throws Exception if at least one resource code is not found in the map.
*/

protected function verifyResourceCodes($codeList)
{
$missing = array();
$map = $this->etlOverseerOptions->getResourceCodeToIdMap();
foreach ( $codeList as $code ) {
if ( ! array_key_exists($code, $map) ) {
$missing[] = $code;
}
}

if ( count($missing) > 0 ) {
$this->logAndThrowException(
sprintf("Unknown resource code(s) specified: '%s'", implode("','", $missing))
);
}
}

/* ------------------------------------------------------------------------------------------
* @see iEtlOverseer::execute()
* ------------------------------------------------------------------------------------------
Expand All @@ -290,6 +324,14 @@ public function execute(EtlConfiguration $etlConfig)
|| count($this->etlOverseerOptions->getExcludeResourceCodes()) > 0
) {
$this->queryResourceCodeToIdMap($etlConfig);

// Verify that specified resource codes are valid

$codeList = array_merge(
$this->etlOverseerOptions->getIncludeOnlyResourceCodes(),
$this->etlOverseerOptions->getExcludeResourceCodes()
);
$this->verifyResourceCodes($codeList);
}

// Pre-pend the default module name to any section names that are not already qualified.
Expand Down
3 changes: 1 addition & 2 deletions classes/ETL/EtlOverseerOptions.php
Original file line number Diff line number Diff line change
Expand Up @@ -789,8 +789,7 @@ public function mapResourceCodesToIds(array $codes) {

foreach ( $codes as $code ) {
if ( false === ($resourceId = $this->getResourceIdFromCode($code)) ) {
$msg = "Unknown include resource code: '$code'";
$this->logAndThrowException($msg);
$this->logAndThrowException(sprintf("Unknown resource code: '%s'", $code));
} else {
$resourceIdList[] = $resourceId;
}
Expand Down
4 changes: 3 additions & 1 deletion classes/ETL/aAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,9 @@ private function initializeVariableStore()
&& 0 != count($value)
) {
$resourceCode = current($value);
$resourceId = $this->etlOverseerOptions->getResourceIdFromCode($resourceCode);
if ( false === ($resourceId = $this->etlOverseerOptions->getResourceIdFromCode($resourceCode)) ) {
$this->logAndThrowException(sprintf("Unknown resource code: '%s'", $resourceCode));
}
if ( count($value) > 1 ) {
$this->logger->info(
sprintf(
Expand Down
180 changes: 180 additions & 0 deletions open_xdmod/modules/xdmod/component_tests/lib/ETL/EtlOverseerTest.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
<?php
/**
* Test various aspects of the EtlOverseer class. We use an EtlConfiguration with the DummyIngestor
* and DummyAggregator to allow us to pipeline infrastructure outside of the actions themselves.
*/

namespace ComponentTests\ETL;

use Exception;
use CCR\DB;
use ETL\EtlOverseer;
use ETL\Configuration\EtlConfiguration;
use ETL\EtlOverseerOptions;

/**
* Various tests for the EtlOverseer class.
*/

class EtlOverseerTest extends \PHPUnit_Framework_TestCase
{
private static $etlConfig = null;
private static $testArtifactInputPath = null;
private static $overseerOptions = null;

/**
* Set up machinery that we will need for these tests.
*
* @return Nothing
*/

public static function setupBeforeClass()
{
self::$testArtifactInputPath = realpath(BASE_DIR . '/tests/artifacts/xdmod-test-artifacts/xdmod/etlv2/configuration/input/');

// Use a pipeline with DummyIngestor and/or DummyAggregator so we can test infrastructure
$configFile = self::$testArtifactInputPath . "/xdmod_etl_config_dummy_actions.json";
self::$etlConfig = new EtlConfiguration(
$configFile,
self::$testArtifactInputPath,
null,
array('default_module_name' => 'xdmod')
);
self::$etlConfig->initialize();

// Explicitly set the resource code map so we don't need to query the database
self::$overseerOptions = new EtlOverseerOptions(
array(
'default-module-name' => 'xdmod',
'process-sections' => array('dummy-actions'),
'resource-code-map' => array(
'resource1' => 1,
'resource2' => 2
)
)
);

}

/**
* Reset values in shared classes.
*/

public function setUp()
{
self::$overseerOptions->setIncludeOnlyResourceCodes(null);
self::$overseerOptions->setIncludeOnlyResourceCodes(null);
}


/**
* Test various cases of valid include and exclude resource codes
*/

public function testValidResourceCodes() {

// Single valid resource codes to include

try {
self::$overseerOptions->setIncludeOnlyResourceCodes('resource1');
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
} catch ( Exception $e ) {
$this->assertTrue(false, $e->getMessage());
}

// Array of valid resource codes to include

try {
self::$overseerOptions->setIncludeOnlyResourceCodes(array('resource1', 'resource2'));
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
} catch ( Exception $e ) {
$this->assertTrue(false, $e->getMessage());
}

// Single valid resource code to exclude

try {
self::$overseerOptions->setExcludeResourceCodes('resource1');
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
} catch ( Exception $e ) {
$this->assertTrue(false, $e->getMessage());
}

// Array of valid resource codes to exclude

try {
self::$overseerOptions->setExcludeResourceCodes(array('resource1', 'resource2'));
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
} catch ( Exception $e ) {
$this->assertTrue(false, $e->getMessage());
}
}

/**
* Test various cases of invali include and exclude resource codes
*/

public function testInvalidResourceCodes() {

// Single invalid resource code to include

$unknownCode = 'unknown101';
$exceptionThrown = true;
try {
self::$overseerOptions->setIncludeOnlyResourceCodes($unknownCode);
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
$exceptionThrown = false;
} catch ( Exception $e ) {
$this->assertContains($unknownCode, $e->getMessage(), "Unknown resource code but did not find expected code '$unknownCode'");
}
$this->assertTrue($exceptionThrown, "Expected exception to be thrown for unknown resource code '$unknownCode'");


// Array with one invalid resource code to include

$unknownCode = 'unknown102';
$exceptionThrown = true;
try {
self::$overseerOptions->setIncludeOnlyResourceCodes(array('resource1',$unknownCode));
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
$exceptionThrown = false;
} catch ( Exception $e ) {
$this->assertContains($unknownCode, $e->getMessage(), "Unknown resource code but did not find expected code '$unknownCode'");
}
$this->assertTrue($exceptionThrown, "Expected exception to be thrown for unknown resource code '$unknownCode'");

// Single invalid resource code to exclude

$unknownCode = 'unknown101';
$exceptionThrown = true;
try {
self::$overseerOptions->setExcludeResourceCodes($unknownCode);
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
$exceptionThrown = false;
} catch ( Exception $e ) {
$this->assertContains($unknownCode, $e->getMessage(), "Unknown resource code but did not find expected code '$unknownCode'");
}
$this->assertTrue($exceptionThrown, "Expected exception to be thrown for unknown resource code '$unknownCode'");

// Array with one invalid resource code to exclude

$unknownCode = 'unknown102';
$exceptionThrown = true;
try {
self::$overseerOptions->setExcludeResourceCodes(array('resource1',$unknownCode));
$overseer = new EtlOverseer(self::$overseerOptions);
$overseer->execute(self::$etlConfig);
$exceptionThrown = false;
} catch ( Exception $e ) {
$this->assertContains($unknownCode, $e->getMessage(), "Unknown resource code but did not find expected code '$unknownCode'");
}
$this->assertTrue($exceptionThrown, "Expected exception to be thrown for unknown resource code '$unknownCode'");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"table_definition": [
{
"$ref": "${table_definition_dir}/dummy_table.json#/table_definition"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"table_definition": {
"name": "dummy_table",
"engine": "MyISAM",
"comment": "Use with dummy actions",
"columns": [
{
"name": "my_int",
"type": "int",
"nullable": false
},
{
"name": "my_varchar8",
"type": "varchar(8)",
"nullable": false
}
]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
{
"#": "Paths for the various configuraiton and data subdirectories. This is added to the global",
"#": "defaults and actions.",

"paths": {
"table_definition_dir": "etl_tables_8.0.0.d",
"action_definition_dir": "etl_action_defs_8.0.0.d",
"specs_dir": "etl_specs.d",
"macro_dir": "etl_macros.d",
"sql_dir": "etl_sql.d",
"data_dir": "etl_data.d"
},

"defaults": {

"#": "Global options are lowest priority and applied to all actions",

"global" : {
"#": "The utility endpoint is used by the etl_overseer script to query resource codes.",
"endpoints": {
"utility": {
"type": "mysql",
"name": "Utility DB",
"config": "datawarehouse",
"schema": "modw"
},
"source": {
"type": "mysql",
"name": "Utility DB",
"config": "datawarehouse",
"schema": "modw"
},
"destination": {
"type": "mysql",
"name": "Utility DB",
"config": "datawarehouse",
"schema": "modw"
}
}
},

"#": "Options specific to the 'dummy-actions' pipeline",

"dummy-actions": {
"namespace": "ETL\\Ingestor",
"options_class": "IngestorOptions"
}

},

"dummy-actions": [
{
"name": "dummy-ingestor",
"description": "Dummy ingestor for testing general ETL machinery",
"class": "DummyIngestor",
"definition_file": "dummy_action.json"
}
]

}