From 29fa5ffc7563f746644c36979147342e8530e2b2 Mon Sep 17 00:00:00 2001 From: Xavier Roussel Date: Thu, 21 Jun 2018 17:48:25 +0200 Subject: [PATCH] Delete DataMerger and introduce Output objects + models --- src/ApiDataChecker.php | 5 + src/ApiDataFetcher.php | 5 +- src/ApiDataMerger.php | 31 ---- src/Identifiers/Doi.php | 2 +- src/Models/Affiliations.php | 30 ++++ src/Models/Authors.php | 48 ++++++ src/Models/Identifiers.php | 30 ++++ src/Models/Journal.php | 30 ++++ src/Models/Model.php | 90 ++++++++++ src/Models/Output.php | 51 ++++++ src/Models/Publication.php | 30 ++++ src/Models/Tags.php | 30 ++++ src/Models/Types.php | 30 ++++ src/Models/Updates.php | 26 +++ src/Resources/Extractors/Extractor.php | 40 +++-- src/Support/helpers.php | 4 +- tests/Integration/ApiDataFetcherTest.php | 195 +--------------------- tests/Integration/ApiDataMergerTest.php | 27 --- tests/Integration/OutputTest.php | 204 +++++++++++++++++++++++ 19 files changed, 647 insertions(+), 261 deletions(-) delete mode 100644 src/ApiDataMerger.php create mode 100644 src/Models/Affiliations.php create mode 100644 src/Models/Authors.php create mode 100644 src/Models/Identifiers.php create mode 100644 src/Models/Journal.php create mode 100644 src/Models/Model.php create mode 100644 src/Models/Output.php create mode 100644 src/Models/Publication.php create mode 100644 src/Models/Tags.php create mode 100644 src/Models/Types.php create mode 100644 src/Models/Updates.php delete mode 100644 tests/Integration/ApiDataMergerTest.php create mode 100644 tests/Integration/OutputTest.php diff --git a/src/ApiDataChecker.php b/src/ApiDataChecker.php index ac693d5..07ac55a 100644 --- a/src/ApiDataChecker.php +++ b/src/ApiDataChecker.php @@ -118,4 +118,9 @@ public static function check($data) return $result; } + + public static function getDataTypes() + { + return array_slice(array_keys(ApiDataChecker::SCHEMA['root']), 1); + } } diff --git a/src/ApiDataFetcher.php b/src/ApiDataFetcher.php index 24e8f20..32dc311 100644 --- a/src/ApiDataFetcher.php +++ b/src/ApiDataFetcher.php @@ -8,6 +8,7 @@ use Psr\Http\Message\ResponseInterface; use GuzzleHttp\Exception\RequestException; use GrahamCampbell\GuzzleFactory\GuzzleFactory; +use PubPeerFoundation\PublicationDataExtractor\Models\Output; use PubPeerFoundation\PublicationDataExtractor\Resources\Resource; use PubPeerFoundation\PublicationDataExtractor\Identifiers\Identifier; @@ -106,9 +107,7 @@ public function getData(): array $this->fetchComplementaryData(); - return array_values( - array_filter($this->apiData) - ); + return Output::getInstance()->format(); } /** diff --git a/src/ApiDataMerger.php b/src/ApiDataMerger.php deleted file mode 100644 index fc8d2e3..0000000 --- a/src/ApiDataMerger.php +++ /dev/null @@ -1,31 +0,0 @@ - $value) { - $tmp[$key][] = $value; - } - } - - return $tmp; - } -} diff --git a/src/Identifiers/Doi.php b/src/Identifiers/Doi.php index 3fee042..09de173 100644 --- a/src/Identifiers/Doi.php +++ b/src/Identifiers/Doi.php @@ -12,7 +12,7 @@ class Doi extends Identifier protected $resources = [ \PubPeerFoundation\PublicationDataExtractor\Resources\Doi::class, \PubPeerFoundation\PublicationDataExtractor\Resources\Crossref::class, - \PubPeerFoundation\PublicationDataExtractor\Resources\PubmedWebsite::class, +// \PubPeerFoundation\PublicationDataExtractor\Resources\PubmedWebsite::class, \PubPeerFoundation\PublicationDataExtractor\Resources\IdConverter::class, \PubPeerFoundation\PublicationDataExtractor\Resources\EutilsEsearch::class, ]; diff --git a/src/Models/Affiliations.php b/src/Models/Affiliations.php new file mode 100644 index 0000000..b572450 --- /dev/null +++ b/src/Models/Affiliations.php @@ -0,0 +1,30 @@ +knownIdentifierValues('name'))) { + $this->list[] = $affiliation; + } + } + + return $this->list; + } +} diff --git a/src/Models/Authors.php b/src/Models/Authors.php new file mode 100644 index 0000000..5b4b211 --- /dev/null +++ b/src/Models/Authors.php @@ -0,0 +1,48 @@ +list))) { + if ($count > $listCount) { + return $this->list = $authors; + } + + return $this->list; + } + + for ($i = 0; $i < $count; ++$i) { + $this->addUnknownAttributes($authors, $i); + } + + return $this->list; + } + + protected function addUnknownAttributes($authors, $i) + { + foreach ($authors[$i] as $key => $value) { + if (empty($value)) { + continue; + } + if (isset($this->list[$i][$key]) && ! empty($this->list[$i][$key])) { + $this->list[$i][$key] = $value; + } + } + } +} diff --git a/src/Models/Identifiers.php b/src/Models/Identifiers.php new file mode 100644 index 0000000..cd7d95e --- /dev/null +++ b/src/Models/Identifiers.php @@ -0,0 +1,30 @@ +knownIdentifierValues('value'))) { + $this->list[] = $identifier; + } + } + + return $this->list; + } +} diff --git a/src/Models/Journal.php b/src/Models/Journal.php new file mode 100644 index 0000000..e6c5e8f --- /dev/null +++ b/src/Models/Journal.php @@ -0,0 +1,30 @@ + $value) { + if ($this->shouldKeepAttribute($key, $value)) { + $this->list[$key] = $value; + } + } + + return $this->list; + } +} diff --git a/src/Models/Model.php b/src/Models/Model.php new file mode 100644 index 0000000..efeb9f0 --- /dev/null +++ b/src/Models/Model.php @@ -0,0 +1,90 @@ +list, $key)); + } + + /** + * Should the attribute be kept? + * + * @param $key + * @param $value + * @return bool + */ + protected function shouldKeepAttribute($key, $value): bool + { + $function = is_array($value) ? 'count' : 'strlen'; + + if (! isset($this->list[$key])) { + return true; + } + + return $function($this->list[$key]) > $function($value); + } + + protected function reset() + { + $this->list = []; + } +} diff --git a/src/Models/Output.php b/src/Models/Output.php new file mode 100644 index 0000000..2d1a30a --- /dev/null +++ b/src/Models/Output.php @@ -0,0 +1,51 @@ +content[$name] = $className::getInstance()->add($resourceData[0]); + } + } + + /** + * Output the formatted content array. + * + * @return array + */ + public function format() + { + $this->resetLists(); + + return $this->content; + } + + protected function resetLists() + { + foreach (ApiDataChecker::getDataTypes() as $type) { + $className = __NAMESPACE__.'\\'.ucfirst($type); + $className::getInstance()->reset(); + } + } +} diff --git a/src/Models/Publication.php b/src/Models/Publication.php new file mode 100644 index 0000000..4737c0a --- /dev/null +++ b/src/Models/Publication.php @@ -0,0 +1,30 @@ + $value) { + if ($this->shouldKeepAttribute($key, $value)) { + $this->list[$key] = $value; + } + } + + return $this->list; + } +} diff --git a/src/Models/Tags.php b/src/Models/Tags.php new file mode 100644 index 0000000..9ad2d5e --- /dev/null +++ b/src/Models/Tags.php @@ -0,0 +1,30 @@ +knownIdentifierValues('name'))) { + $this->list[] = $tag; + } + } + + return $this->list; + } +} diff --git a/src/Models/Types.php b/src/Models/Types.php new file mode 100644 index 0000000..2d9d02a --- /dev/null +++ b/src/Models/Types.php @@ -0,0 +1,30 @@ +knownIdentifierValues('name'))) { + $this->list[] = $type; + } + } + + return $this->list; + } +} diff --git a/src/Models/Updates.php b/src/Models/Updates.php new file mode 100644 index 0000000..58a7dd3 --- /dev/null +++ b/src/Models/Updates.php @@ -0,0 +1,26 @@ +list = $updates; + + return $this->list; + } +} diff --git a/src/Resources/Extractors/Extractor.php b/src/Resources/Extractors/Extractor.php index d0c623b..3214b08 100644 --- a/src/Resources/Extractors/Extractor.php +++ b/src/Resources/Extractors/Extractor.php @@ -2,6 +2,7 @@ namespace PubPeerFoundation\PublicationDataExtractor\Resources\Extractors; +use PubPeerFoundation\PublicationDataExtractor\Models\Output; use PubPeerFoundation\PublicationDataExtractor\ApiDataChecker; use PubPeerFoundation\PublicationDataExtractor\Exceptions\JournalTitleNotFoundException; @@ -41,7 +42,7 @@ public function extract(): array { $this->getDataFromDocument(); - foreach ($this->dataTypes() as $type) { + foreach (ApiDataChecker::getDataTypes() as $type) { $class = __NAMESPACE__.'\\Provides'.ucfirst($type).'Data'; if ($this instanceof $class) { @@ -53,22 +54,39 @@ public function extract(): array } } } +// if (isset($this->output['identifiers'])) { +// Output::getInstance()->addIdentifiers($this->output['identifiers']); +// } +// if (isset($this->output['publication'])) { +// Output::getInstance()->addPublication($this->output['publication']); +// } +// if (isset($this->output['journal'])) { +// Output::getInstance()->addJournal($this->output['journal']); +// } +// if (isset($this->output['affiliations'])) { +// Output::getInstance()->addAffiliations($this->output['affiliations']); +// } +// if (isset($this->output['types'])) { +// Output::getInstance()->addTypes($this->output['types']); +// } +// if (isset($this->output['tags'])) { +// Output::getInstance()->addTags($this->output['tags']); +// } +// if (isset($this->output['authors'])) { +// Output::getInstance()->addAuthors($this->output['authors']); +// } + foreach (ApiDataChecker::getDataTypes() as $type) { + if (isset($this->output[$type])) { + $method = 'add'.ucfirst($type); + Output::getInstance()->$method($this->output[$type]); + } + } $this->addOutputSource(); return $this->output; } - /** - * Get a list of data Types from Schema. - * - * @return array - */ - protected function dataTypes(): array - { - return array_slice(array_keys(ApiDataChecker::SCHEMA['root']), 1); - } - /** * Prepare each data document. * diff --git a/src/Support/helpers.php b/src/Support/helpers.php index b2d7110..c132311 100644 --- a/src/Support/helpers.php +++ b/src/Support/helpers.php @@ -102,7 +102,9 @@ function stringify($data) } if (is_array($data)) { - return stringify($data[0]); + $data = array_filter($data); + + return stringify(array_shift($data)); } return (string) trim($data); diff --git a/tests/Integration/ApiDataFetcherTest.php b/tests/Integration/ApiDataFetcherTest.php index dd852a7..5559642 100644 --- a/tests/Integration/ApiDataFetcherTest.php +++ b/tests/Integration/ApiDataFetcherTest.php @@ -2,7 +2,6 @@ namespace PubPeerFoundation\PublicationDataExtractor\Test\Integration; -use PubPeerFoundation\PublicationDataExtractor\ApiDataMerger; use PubPeerFoundation\PublicationDataExtractor\Test\TestCase; use PubPeerFoundation\PublicationDataExtractor\ApiDataFetcher; use PubPeerFoundation\PublicationDataExtractor\IdentifierResolver; @@ -23,7 +22,7 @@ public function it_fetches_data_from_a_doi_identifier() $dataFetcher->fetch(); // Assert - $this->assertCount(6, $dataFetcher->getData()); + $this->assertArrayIsValid($dataFetcher->getData()); } /** @@ -40,8 +39,7 @@ public function it_fetches_data_from_a_pubmed_doi_identifier() $dataFetcher->fetch(); // Assert - $this->assertCount(5, $extracted = $dataFetcher->getData()); - $this->assertArrayIsValid($extracted); + $this->assertArrayIsValid($dataFetcher->getData()); } /** @@ -58,7 +56,7 @@ public function it_fetches_data_from_a_arxiv_identifier() $dataFetcher->fetch(); // Assert - $this->assertCount(1, $dataFetcher->getData()); + $this->assertArrayIsValid($dataFetcher->getData()); } /** @@ -75,7 +73,7 @@ public function it_fetches_data_from_a_pubmed_identifier() $dataFetcher->fetch(); // Assert - $this->assertCount(1, $dataFetcher->getData()); + $this->assertArrayIsValid($dataFetcher->getData()); } /** @@ -92,7 +90,7 @@ public function it_fetches_data_from_a_doi_book_identifier() $dataFetcher->fetch(); // Assert - $this->assertCount(2, $dataFetcher->getData()); + $this->assertArrayIsValid($dataFetcher->getData()); } /** @@ -125,187 +123,10 @@ public function it_can_extract_pubmed_ids_from_id_converter() $dataFetcher->fetch(); // Assert - $this->assertCount(5, $extracted = $dataFetcher->getData()); + $extracted = $dataFetcher->getData(); - $merged = ApiDataMerger::handle($extracted); - $identifiers = array_merge(...$merged['identifiers']); - $this->assertTrue(count(array_filter($identifiers, function ($identifier) { + $this->assertCount(1, array_filter($extracted['identifiers'], function ($identifier) { return 'pubmed' === $identifier['type']; - })) > 0); - } - - /** - * @test - * @group forever - */ - public function test_first_150_entries_of_pubpeer_identifiers() - { - // Took 3.65 minutes to complete, average .6 seconds per fetch. - - $ids = [ - '10.1016/j.molcel.2017.01.013', - '28157506', - '10.1093/humrep/dex008', - '28166330', - '10.1111/apt.14044', - '28318052', - '10.1073/pnas.1523936113', - '10.1016/j.neuropharm.2017.04.043', - '10.1016/j.biomaterials.2015.07.055', - '26318819', - '10.1038/546033a', - '10.1038/leu.2011.62', - '21494253', - '10.1016/j.canlet.2013.12.005', - '10.1080/10408390802064347', - '10.1016/j.jss.2013.12.028', - '10.1093/embo-reports/kvf213', - '12393750', - '10.1371/journal.pone.0093386', - '10.1016/j.cell.2015.01.009', - '11624264', - '10.1152/ajpheart.00005.2017', - '28130332', - '10.1177/1078390316668478', - '10.3390/vision1010009', - '10.1016/j.cell.2009.11.030', - '19962179', - '10.1074/jbc.M115.693200', - '26634277', - '10.1530/REP-17-0063', - '28420801', - '10.1002/jum.14272', - '28586113', - '10.1007/s00424-017-2007-x', - '28597189', - '10.1101/gr.153551.112', - '23934932', - '10.1080/0142159X.2017.1324137', - '10.1016/j.meegid.2017.03.023', - '28342885', - '10.1002/hep.26505', - '10.1152/ajpheart.00281.2016', - '27986658', - '10.1016/j.gdata.2015.07.008', - '26697316', - '10.1007/s00299-016-1960-8', - '10.1128/MCB.25.8.3338-3347.2005', - '15798217', - '7958844', - '10.1074/jbc.M400881200', - '14970224', - '10.1002/hep.28927', - '28114741', - '10.1093/nar/gkw557', - '27325741', - '10.1038/ncomms7087', - '25608663', - '10.1104/pp.109.141911', - '10.1158/0008-5472.CAN-09-3114', - '20197467', - '10.12688/f1000research.11119.1', - '28529709', - '16796559', - '10.1167/17.5.16', - '10.1371/journal.pgen.1006810', - '8386381', - '10.1136/bmj.b4144', - '10.1083/jcb.201504117', - '10.1021/acs.nanolett.7b01464', - '10.1523/JNEUROSCI.0175-11.2012', - '10.1111/anae.13938', - '10.1016/j.ccell.2017.02.017', - '10.1073/pnas.1221733110', - '23812746', - '10.3389/fphar.2017.00275', - '28559847', - '10.1080/10408398.2016.1246414', - '27736161', - '10.1167/17.6.1', - '10.1136/bmj.b4330', - '10.1016/j.devcel.2015.01.012', - '10.1016/j.meegid.2017.03.023', - '10.1093/biosci/biw010', - '10.1016/j.cell.2015.07.047', - '26317470', - '10.1002/hep.28927', - '27831662', - '10.1167/17.6.2', - '10.1109/TPWRS.2015.2499700', - '10.1093/nar/gkw350', - '27131367', - '10.1126/science.aai7984', - '28360327', - '10.1093/nar/gkv644', - '26130711', - '10.1038/srep35422', - '10.1016/j.heares.2016.12.006', - '28011083', - '10.1517/13543784.17.11.1769', - '18922112', - '10.1016/j.neuron.2017.05.020', - '10.1128/MCB.00199-17', - '10.1016/j.msec.2017.03.207', - '10.1105/tpc.111.094904', - '22427335', - '10.1074/jbc.M610819200', - '17389604', - '10.1016/j.bbi.2013.05.005', - '10.1007/s11948-017-9915-1', - '10.1038/sj.onc.1204553', - '11439350', - '10.1056/NEJMsb1616595', - '28402238', - '10.1101/142554', - '10.1056/NEJMra1612008', - '28514605', - '14975242', - '10.1016/j.clinthera.2017.02.007', - '28291580', - '10.4049/jimmunol.0903617', - '12509522', - '10.1179/016164107X204693', - '17626733', - '10.1074/jbc.M117.794487', - '28512129', - '10.4103/0019-509X.175593', - '10.3758/s13414-016-1160-1', - '10.1096/fj.06-7947com', - '17442731', - '10.1038/ncomms15160', - '28513586', - '10.1016/j.amepre.2013.08.019', - '24355667', - '6191871', - '10.1111/mpp.12417', - '10.1126/scisignal.2002790', - '22692423', - '10.1016/j.bbapap.2011.11.003', - '22155276', - '10.1038/nmeth.4293', - '10.1038/nature22314', - '10.1523/JNEUROSCI.2086-05.2005', - '16107656', - '10.4049/jimmunol.1201651', - '23028059', - '10.1016/j.jesp.2010.01.007', - '10.4161/auto.19653', - '22576015', - '10.1002/1873-3468.12657', - ]; - - // Act - foreach ($ids as $id) { - $identifier = new IdentifierResolver($id); - $dataFetcher = new ApiDataFetcher($identifier->handle()); - - $dataFetcher->fetch(); - - $extracted = $dataFetcher->getData(); - - foreach ($extracted as $resource) { - $this->assertArrayIsValid($resource); - } - } + })); } } diff --git a/tests/Integration/ApiDataMergerTest.php b/tests/Integration/ApiDataMergerTest.php deleted file mode 100644 index 7ff4436..0000000 --- a/tests/Integration/ApiDataMergerTest.php +++ /dev/null @@ -1,27 +0,0 @@ - ['title' => 'blabla']], - ['publication' => ['title' => 'blibli']], - ]; - - // Act - $mergedData = ApiDataMerger::handle($fetchedData); - - // Assert - $this->assertCount(1, $mergedData); - $this->assertArrayHasKey('publication', $mergedData); - $this->assertCount(2, $mergedData['publication']); - } -} diff --git a/tests/Integration/OutputTest.php b/tests/Integration/OutputTest.php new file mode 100644 index 0000000..2603aa6 --- /dev/null +++ b/tests/Integration/OutputTest.php @@ -0,0 +1,204 @@ +handle()); + + $dataFetcher->fetch(); + + $dataFetcher->getData(); + $formatted = Output::getInstance()->format(); + $this->assertArrayIsValid($formatted); + } + } +}