From 68edeff22e09a796cff45e5e841d2f1405ce73d4 Mon Sep 17 00:00:00 2001 From: "Jeffrey T. Palmer" Date: Fri, 25 Jan 2019 08:40:40 -0500 Subject: [PATCH] Add storage shredder/ingestor support --- bin/xdmod-ingestor | 8 +++ bin/xdmod-shredder | 19 +++-- classes/ETL/Ingestor/HpcdbHostsIngestor.php | 2 +- .../OpenXdmod/DataWarehouseInitializer.php | 47 ++++++++++++- classes/OpenXdmod/Shredder/Storage.php | 69 +++++++++++++++++++ configuration/etl/etl.d/hpcdb-xdw.json | 33 ++++----- configuration/etl/etl.d/ingest_resources.json | 2 +- docs/storage.md | 30 +------- 8 files changed, 154 insertions(+), 56 deletions(-) create mode 100644 classes/OpenXdmod/Shredder/Storage.php diff --git a/bin/xdmod-ingestor b/bin/xdmod-ingestor index a3900e93a8..93115f9efd 100755 --- a/bin/xdmod-ingestor +++ b/bin/xdmod-ingestor @@ -269,6 +269,10 @@ function main() if($datatypeValue == 'genericcloud'){ $dwi->ingestCloudDataGeneric(); } + + if ($datatypeValue == 'storage') { + $dwi->ingestStorageData(); + } } } catch (Exception $e) { $logger->crit(array( @@ -291,6 +295,10 @@ function main() if($realmToAggregate == 'cloud' || $realmToAggregate === false){ $dwi->aggregateCloudData(); } + + if ($realmToAggregate == 'storage' || $realmToAggregate === false) { + $dwi->aggregateStorageData(); + } } catch (Exception $e) { $logger->crit(array( 'message' => 'Aggregation failed: ' . $e->getMessage(), diff --git a/bin/xdmod-shredder b/bin/xdmod-shredder index 356668b001..a988555c1d 100755 --- a/bin/xdmod-shredder +++ b/bin/xdmod-shredder @@ -234,15 +234,15 @@ function main() } if (!$dryRun) { - $logger->notice('Normalizing data!'); - try { $ingestor = $shredder->getJobIngestor(); - // The cloud shredders do not have jobs to ingest and return false when - // getJobInestor is called for them so we don't have to hard code skippping - // those formats here. - if($ingestor !== false){ + // The cloud and storage shredders do not have jobs to ingest and + // return false when getJobInestor is called for them so we don't + // have to hard code skipping those formats here. + if ($ingestor !== false) { + $logger->notice('Normalizing data'); $ingestor->ingest(); + $logger->notice('Done normalizing data'); } } catch (Exception $e) { $logger->crit(array( @@ -251,8 +251,6 @@ function main() )); exit(1); } - - $logger->notice('Done normalizing data'); } // NOTE: "process_end_time" is needed for the log summary. @@ -291,10 +289,11 @@ Usage: xdmod-shredder [-v] -r resource -f format [-i file|-d dir] -f, --format *format* Specify the log file format ("pbs" for PBS/TORQUE, "sge" for Sun Grid Engine, "uge" for Univa Grid Engine 8.2+, "slurm" for - Slurm or "lsf" for LSF). + Slurm or "lsf" for LSF, "storage" for storage data). -i, --input *file* - Specify a single log file to shred. + Specify a single log file to shred. Not applicable to cloud and + storage data. -d, --dir *directory* Specify a directory containing log files to shred. Log files in diff --git a/classes/ETL/Ingestor/HpcdbHostsIngestor.php b/classes/ETL/Ingestor/HpcdbHostsIngestor.php index b601ccbfb4..6e8f37f06f 100644 --- a/classes/ETL/Ingestor/HpcdbHostsIngestor.php +++ b/classes/ETL/Ingestor/HpcdbHostsIngestor.php @@ -26,7 +26,7 @@ public function transform(array $srcRecord, $orderId) /** * call HostListParser to expand host names and updates * this record to be able to be turned into something that - * can then be used in hpcdb-xdw-ingest.job-hosts action to + * can then be used in hpcdb-xdw-ingest-jobs.job-hosts action to * the job hosts table. * @see Xdmod\HostListParser */ diff --git a/classes/OpenXdmod/DataWarehouseInitializer.php b/classes/OpenXdmod/DataWarehouseInitializer.php index 27d7f1d98f..2ee824b220 100644 --- a/classes/OpenXdmod/DataWarehouseInitializer.php +++ b/classes/OpenXdmod/DataWarehouseInitializer.php @@ -128,6 +128,7 @@ public function ingestAll($startDate = null, $endDate = null) $this->ingestAllHpcdb($startDate, $endDate); $this->ingestCloudDataGeneric(); $this->ingestCloudDataOpenStack(); + $this->ingestStorageData(); } /** @@ -189,7 +190,7 @@ public function ingestAllHpcdb($startDate = null, $endDate = null) = $this->hpcdbDb->query('SELECT NOW() AS now FROM dual')[0]['now']; Utilities::runEtlPipeline( - array('hpcdb-xdw-ingest'), + array('hpcdb-xdw-ingest-common', 'hpcdb-xdw-ingest-jobs'), $this->logger, array('last-modified-start-date' => $lastModifiedStartDate) ); @@ -242,6 +243,31 @@ public function ingestCloudDataGeneric() } } + /** + * Ingest storage data. + * + * If the storage realm is not enabled then do nothing. + */ + public function ingestStorageData() + { + if (!$this->isRealmEnabled('Storage')) { + $this->logger->notice('Storage realm not enabled, not ingesting'); + return; + } + + $this->logger->notice('Ingesting storage data'); + Utilities::runEtlPipeline( + [ + 'staging-ingest-common', + 'hpcdb-ingest-common', + 'hpcdb-ingest-storage', + 'hpcdb-xdw-ingest-common', + 'xdw-ingest-storage', + ], + $this->logger + ); + } + /** * Aggregating all cloud data. If the appropriate tables do not exist then * catch the resulting exception and display a message saying that there @@ -259,6 +285,25 @@ public function aggregateCloudData() } } + /** + * Aggregate storage data. + * + * If the storage realm is not enabled then do nothing. + */ + public function aggregateStorageData() + { + if (!$this->isRealmEnabled('Storage')) { + $this->logger->notice('Storage realm not enabled, not aggregating'); + return; + } + + $this->logger->notice('Aggregating storage data'); + Utilities::runEtlPipeline(['xdw-aggregate-storage'], $this->logger); + $filterListBuilder = new FilterListBuilder(); + $filterListBuilder->setLogger($this->logger); + $filterListBuilder->buildRealmLists('Storage'); + } + /** * Initialize aggregate database. * diff --git a/classes/OpenXdmod/Shredder/Storage.php b/classes/OpenXdmod/Shredder/Storage.php new file mode 100644 index 0000000000..7d8dc3ffcb --- /dev/null +++ b/classes/OpenXdmod/Shredder/Storage.php @@ -0,0 +1,69 @@ + + */ + +namespace OpenXdmod\Shredder; + +use CCR\DB\iDatabase; +use ETL\Utilities; +use Exception; +use Log; +use OpenXdmod\Shredder; + +/** + * Storage shredder. + */ +class Storage extends Shredder +{ + /** + * Override constructor to prevent job related database query. + */ + protected function __construct(iDatabase $db) + { + $this->db = $db; + $this->logger = Log::singleton('null'); + $this->format = 'storage'; + } + + /** + * The storage shredder does not support shredding a single file so throw + * an exception. + */ + public function shredFile($file) + { + throw new Exception(<<logger->notice("Shredding directory '$dir'"); + + if (!is_dir($dir)) { + $this->logger->err("'$dir' is not a directory"); + return false; + } + + Utilities::runEtlPipeline( + ['staging-ingest-storage'], + $this->logger, + ['variable-overrides' => ['STORAGE_LOG_DIRECTORY' => $dir]] + ); + } + + /** + * Returns false to indicate this shredder does not support ingestion of + * jobs. + */ + public function getJobIngestor($ingestAll = false) + { + return false; + } +} diff --git a/configuration/etl/etl.d/hpcdb-xdw.json b/configuration/etl/etl.d/hpcdb-xdw.json index d79fde5331..4389e3be28 100644 --- a/configuration/etl/etl.d/hpcdb-xdw.json +++ b/configuration/etl/etl.d/hpcdb-xdw.json @@ -1,6 +1,10 @@ { "defaults": { "global": { + "class": "DatabaseIngestor", + "namespace": "ETL\\Ingestor", + "options_class": "IngestorOptions", + "enabled": true, "endpoints": { "source": { "type": "mysql", @@ -17,17 +21,11 @@ "create_schema_if_not_exists": true } } - }, - "hpcdb-xdw-ingest": { - "class": "DatabaseIngestor", - "namespace": "ETL\\Ingestor", - "options_class": "IngestorOptions", - "enabled": true } }, - "#": "New job ingestion from HPCDB to XDMoD DataWarehouse (xdw)", - "hpcdb-xdw-ingest": [{ + "#": "Ingestion of common (multiple realm) data from HPCDB to XDMoD Data Warehouse (xdw)", + "hpcdb-xdw-ingest-common": [{ "name": "account", "definition_file": "jobs/xdw/account.json", "description": "Account records" @@ -63,10 +61,6 @@ "description": "organization records", "#": "disable to allow order_id to be set", "optimize_query": false - }, { - "name": "node-count", - "definition_file": "jobs/xdw/node-count.json", - "description": "node count records" }, { "name": "pi-person", "definition_file": "jobs/xdw/piperson.json", @@ -87,10 +81,6 @@ "name": "principal-investigator", "definition_file": "jobs/xdw/principal-investigator.json", "description": "principal investigator records" - }, { - "name": "queue", - "definition_file": "jobs/xdw/queue.json", - "description": "queue records" }, { "name": "request", "definition_file": "jobs/xdw/request.json", @@ -115,6 +105,17 @@ "name": "system-account", "definition_file": "jobs/xdw/system-account.json", "description": "system account records" + }], + + "#": "New job ingestion from HPCDB to XDMoD DataWarehouse (xdw)", + "hpcdb-xdw-ingest-jobs": [{ + "name": "node-count", + "definition_file": "jobs/xdw/node-count.json", + "description": "node count records" + }, { + "name": "queue", + "definition_file": "jobs/xdw/queue.json", + "description": "queue records" }, { "name": "hosts", "class": "HpcdbHostsIngestor", diff --git a/configuration/etl/etl.d/ingest_resources.json b/configuration/etl/etl.d/ingest_resources.json index 710a52ccc7..59f824ed09 100644 --- a/configuration/etl/etl.d/ingest_resources.json +++ b/configuration/etl/etl.d/ingest_resources.json @@ -89,7 +89,7 @@ "class": "DatabaseIngestor", "name": "IngestResourcefact", "definition_file": "jobs/xdw/resource-fact.json", - "description": "Ingest resource information into resourcefact table. Modeled after hpcdb-xdw-ingest.resource", + "description": "Ingest resource information into resourcefact table. Modeled after hpcdb-xdw-ingest-common.resource", "endpoints": { "source": { "type": "mysql", diff --git a/docs/storage.md b/docs/storage.md index c6e175dfeb..7cc84ed1b1 100644 --- a/docs/storage.md +++ b/docs/storage.md @@ -196,9 +196,7 @@ fully ingest storage data into the data warehouse. Ingest all files in the `/path/to/storage/logs` directory: ``` -$ /usr/share/xdmod/tools/etl/etl_overseer.php \ - -d STORAGE_LOG_DIRECTORY=/path/to/storage/logs \ - -p xdmod.staging-ingest-storage +$ xdmod-shredder -f storage -r resource-name -d /path/to/storage/logs ``` **NOTE**: The above command will ingest all files in the `/path/to/storage/logs` @@ -207,28 +205,6 @@ directory even if they have already been ingested. Ingest and aggregate data: ``` -$ /usr/share/xdmod/tools/etl/etl_overseer.php \ - -p xdmod.staging-ingest-common \ - -p xdmod.hpcdb-ingest-common \ - -p xdmod.hpcdb-ingest-storage -$ /usr/share/xdmod/tools/etl/etl_overseer.php \ - -a xdmod.hpcdb-xdw-ingest.resource \ - -a xdmod.hpcdb-xdw-ingest.field-of-science \ - -a xdmod.hpcdb-xdw-ingest.field-of-science-hierarchy \ - -a xdmod.hpcdb-xdw-ingest.organization \ - -a xdmod.hpcdb-xdw-ingest.pi-person \ - -a xdmod.hpcdb-xdw-ingest.person \ - -a xdmod.hpcdb-xdw-ingest.people-under-pi \ - -a xdmod.hpcdb-xdw-ingest.principal-investigator \ - -a xdmod.hpcdb-xdw-ingest.resource-type \ - -a xdmod.hpcdb-xdw-ingest.system-account -$ /usr/share/xdmod/tools/etl/etl_overseer.php \ - -p xdmod.xdw-ingest-storage \ - -p xdmod.xdw-aggregate-storage -``` - -Rebuild filter lists after aggregation: - -``` -$ /usr/bin/xdmod-build-filter-lists -r Storage +$ xdmod-ingestor --ingest --datatype storage +$ xdmod-ingestor --aggregate=storage ```