From 806a6a61f75737739cea252c385b14641008fca2 Mon Sep 17 00:00:00 2001 From: Dibyendu Gupta <63497225+dtgupta@users.noreply.github.com> Date: Fri, 5 Jan 2024 04:47:10 +0100 Subject: [PATCH 1/2] Add city field to research_organizations table and update corresponding tests --- src/alexandria3k/data_sources/ror.py | 1 + tests/data_sources/test_ror.py | 2 ++ 2 files changed, 3 insertions(+) diff --git a/src/alexandria3k/data_sources/ror.py b/src/alexandria3k/data_sources/ror.py index 9b3f93cd..6c1a46b7 100644 --- a/src/alexandria3k/data_sources/ror.py +++ b/src/alexandria3k/data_sources/ror.py @@ -190,6 +190,7 @@ def __init__(self, name, **kwargs): ColumnMeta("name", lambda row: row["name"]), ColumnMeta("status", lambda row: row["status"]), ColumnMeta("established", lambda row: row["established"]), + ColumnMeta("city", lambda row: row["addresses"][0]["city"]), ColumnMeta( "country_code", lambda row: row["country"]["country_code"] ), diff --git a/tests/data_sources/test_ror.py b/tests/data_sources/test_ror.py index c9440420..bd3a33e1 100644 --- a/tests/data_sources/test_ror.py +++ b/tests/data_sources/test_ror.py @@ -69,6 +69,7 @@ def test_organization(self): name, status, established, + city, country_code, grid, ) = result.fetchone() @@ -77,6 +78,7 @@ def test_organization(self): self.assertEqual(name, "Australian National University") self.assertEqual(status, "active") self.assertEqual(established, 1946) + self.assertEqual(city, "Canberra") self.assertEqual(country_code, "AU") self.assertEqual(grid, "grid.1001.0") From 9bdd79962cca30649a0309f1eadb0d17aec59c79 Mon Sep 17 00:00:00 2001 From: Dibyendu Gupta <63497225+dtgupta@users.noreply.github.com> Date: Mon, 8 Jan 2024 02:39:14 +0100 Subject: [PATCH 2/2] fold address into main table --- src/alexandria3k/data_sources/ror.py | 34 ++++++++++---------- tests/data_sources/test_ror.py | 47 +++++++++++++--------------- 2 files changed, 39 insertions(+), 42 deletions(-) diff --git a/src/alexandria3k/data_sources/ror.py b/src/alexandria3k/data_sources/ror.py index 41e8a492..7c7a37d0 100644 --- a/src/alexandria3k/data_sources/ror.py +++ b/src/alexandria3k/data_sources/ror.py @@ -190,10 +190,6 @@ def __init__(self, name, **kwargs): ColumnMeta("name", lambda row: row["name"]), ColumnMeta("status", lambda row: row["status"]), ColumnMeta("established", lambda row: row["established"]), - ColumnMeta("city", lambda row: row["addresses"][0]["city"]), - ColumnMeta( - "country_code", lambda row: row["country"]["country_code"] - ), # Although deprecated, we are adding it as an additional organization identifier, as # it provides useful to determine the ground truth data. Some organizations may not # have a GRID identifier, so we need to make sure it doesn't raise any errors. @@ -203,6 +199,23 @@ def __init__(self, name, **kwargs): .get("GRID", {}) .get("all"), ), + # Each research organization has only 1 address. They have been folded into this table. + # This is a simplified address schema. Add more field when ROR settles it. + ColumnMeta( + "address_city", lambda row: row["addresses"][0]["city"] + ), + ColumnMeta( + "address_state", lambda row: row["addresses"][0]["state"] + ), + ColumnMeta( + "address_postcode", lambda row: row["addresses"][0]["postcode"] + ), + ColumnMeta( + "address_country_code", + lambda row: row["country"]["country_code"], + ), + ColumnMeta("address_lat", lambda row: row["addresses"][0]["lat"]), + ColumnMeta("address_lng", lambda row: row["addresses"][0]["lng"]), ], ), RorDetailsTableMeta( @@ -241,19 +254,6 @@ def __init__(self, name, **kwargs): ColumnMeta("ror_path", lambda row: row["id"][16:]), ], ), - RorDetailsTableMeta( - "ror_addresses", - extract_multiple=lambda row: row["addresses"], - columns=[ - # ROR will simplify the current address schema. - # Add more fields when ROR settles it. - ColumnMeta("lat", lambda row: row["lat"]), - ColumnMeta("lng", lambda row: row["lng"]), - ColumnMeta("city", lambda row: row["city"]), - ColumnMeta("state", lambda row: row["state"]), - ColumnMeta("postcode", lambda row: row["postcode"]), - ], - ), # OrgRef is deprecated, so we are not supporting this field RorDetailsTableMeta( "ror_funder_ids", diff --git a/tests/data_sources/test_ror.py b/tests/data_sources/test_ror.py index b53ac4d7..d00ceeb2 100644 --- a/tests/data_sources/test_ror.py +++ b/tests/data_sources/test_ror.py @@ -70,18 +70,26 @@ def test_organization(self): name, status, established, + grid, city, + state, + postcode, country_code, - grid, + latitude, + longitude, ) = result.fetchone() self.assertEqual(id, 0) self.assertEqual(ror_path, "019wvm592") self.assertEqual(name, "Australian National University") self.assertEqual(status, "active") self.assertEqual(established, 1946) + self.assertEqual(grid, "grid.1001.0") self.assertEqual(city, "Canberra") + self.assertEqual(state, "Australian Capital Territory") + self.assertEqual(postcode, None) self.assertEqual(country_code, "AU") - self.assertEqual(grid, "grid.1001.0") + self.assertEqual(latitude, -35.2778) + self.assertEqual(longitude, 149.1205) def test_blank_external_ids(self): result = TestRorPopulate.cursor.execute( @@ -94,16 +102,26 @@ def test_blank_external_ids(self): name, status, established, - country_code, grid, + city, + state, + postcode, + country_code, + latitude, + longitude, ) = result.fetchone() self.assertEqual(id, 28) self.assertEqual(ror_path, "02f4ks689") self.assertEqual(name, "Axiom Data Science") self.assertEqual(status, "active") self.assertEqual(established, 2007) - self.assertEqual(country_code, "US") self.assertEqual(grid, None) + self.assertEqual(city, "Anchorage") + self.assertEqual(state, None) + self.assertEqual(postcode, None) + self.assertEqual(country_code, "US") + self.assertEqual(latitude, 61.21806) + self.assertEqual(longitude, -149.90028) def test_funder_ids(self): result = TestRorPopulate.cursor.execute( @@ -181,27 +199,6 @@ def test_ror_relationships(self): in rows ) - def test_ror_addresses(self): - result = TestRorPopulate.cursor.execute( - """SELECT * FROM ror_addresses WHERE ror_id=( - SELECT id FROM research_organizations WHERE - ror_path='02bfwt286')""" - ) - rows = list(result) - self.assertEqual(len(rows), 1) - self.assertTrue( - ( - 16384, - 1, - -37.9083, - 145.138, - "Melbourne", - "Victoria", - None, - ) - in rows - ) - def test_ror_wikidata_ids(self): result = TestRorPopulate.cursor.execute( """SELECT wikidata_id FROM ror_wikidata_ids WHERE ror_id=(