From 6207af748bfc7f1d8477629b81539417311bed3b Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 13 Dec 2022 11:33:14 -0500 Subject: [PATCH 1/2] spiders: Remove lapsed spiders --- docs/spiders.rst | 62 ------------------- .../spiders/ecuador_emergency.py | 30 --------- .../spiders/mexico_oaxaca_iaip.py | 24 ------- .../spiders/nicaragua_solid_waste.py | 27 -------- .../spiders/nigeria_budeshi_records.py | 18 ------ .../spiders/nigeria_budeshi_releases.py | 34 ---------- .../nigeria_kaduna_state_budeshi_records.py | 14 ----- .../nigeria_kaduna_state_budeshi_releases.py | 14 ----- kingfisher_scrapy/spiders/tanzania_zabuni.py | 37 ----------- 9 files changed, 260 deletions(-) delete mode 100644 kingfisher_scrapy/spiders/ecuador_emergency.py delete mode 100644 kingfisher_scrapy/spiders/mexico_oaxaca_iaip.py delete mode 100644 kingfisher_scrapy/spiders/nicaragua_solid_waste.py delete mode 100644 kingfisher_scrapy/spiders/nigeria_budeshi_records.py delete mode 100644 kingfisher_scrapy/spiders/nigeria_budeshi_releases.py delete mode 100644 kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_records.py delete mode 100644 kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_releases.py delete mode 100644 kingfisher_scrapy/spiders/tanzania_zabuni.py diff --git a/docs/spiders.rst b/docs/spiders.rst index a7c1d04a3..cf44035bd 100644 --- a/docs/spiders.rst +++ b/docs/spiders.rst @@ -366,13 +366,6 @@ Dominican Republic Ecuador ~~~~~~~ -.. autoclass:: kingfisher_scrapy.spiders.ecuador_emergency.EcuadorEmergency - :no-members: - -.. code-block:: bash - - scrapy crawl ecuador_emergency - .. autoclass:: kingfisher_scrapy.spiders.ecuador_sercop_api.EcuadorSERCOPAPI :no-members: @@ -799,13 +792,6 @@ Mexico scrapy crawl mexico_nuevo_leon_releases -.. autoclass:: kingfisher_scrapy.spiders.mexico_oaxaca_iaip.MexicoOaxacaIAIP - :no-members: - -.. code-block:: bash - - scrapy crawl mexico_oaxaca_iaip - .. autoclass:: kingfisher_scrapy.spiders.mexico_puebla_itaipue.MexicoPueblaITAIPUE :no-members: @@ -906,16 +892,6 @@ Netherlands scrapy crawl netherlands_digiwhist -Nicaragua -~~~~~~~~~ - -.. autoclass:: kingfisher_scrapy.spiders.nicaragua_solid_waste.NicaraguaSolidWaste - :no-members: - -.. code-block:: bash - - scrapy crawl nicaragua_solid_waste - Nigeria ~~~~~~~ @@ -933,20 +909,6 @@ Nigeria scrapy crawl nigeria_anambra_state -.. autoclass:: kingfisher_scrapy.spiders.nigeria_budeshi_records.NigeriaBudeshiRecords - :no-members: - -.. code-block:: bash - - scrapy crawl nigeria_budeshi_records - -.. autoclass:: kingfisher_scrapy.spiders.nigeria_budeshi_releases.NigeriaBudeshiReleases - :no-members: - -.. code-block:: bash - - scrapy crawl nigeria_budeshi_releases - .. autoclass:: kingfisher_scrapy.spiders.nigeria_cross_river_state_records.NigeriaCrossRiverStateRecords :no-members: @@ -996,20 +958,6 @@ Nigeria scrapy crawl nigeria_gombe_state -.. autoclass:: kingfisher_scrapy.spiders.nigeria_kaduna_state_budeshi_records.NigeriaKadunaStateBudeshiRecords - :no-members: - -.. code-block:: bash - - scrapy crawl nigeria_kaduna_state_budeshi_records - -.. autoclass:: kingfisher_scrapy.spiders.nigeria_kaduna_state_budeshi_releases.NigeriaKadunaStateBudeshiReleases - :no-members: - -.. code-block:: bash - - scrapy crawl nigeria_kaduna_state_budeshi_releases - .. autoclass:: kingfisher_scrapy.spiders.nigeria_kano_state.NigeriaKanoState :no-members: @@ -1262,16 +1210,6 @@ Switzerland scrapy crawl switzerland_digiwhist -Tanzania -~~~~~~~~ - -.. autoclass:: kingfisher_scrapy.spiders.tanzania_zabuni.TanzaniaZabuni - :no-members: - -.. code-block:: bash - - scrapy crawl tanzania_zabuni - Uganda ~~~~~~ diff --git a/kingfisher_scrapy/spiders/ecuador_emergency.py b/kingfisher_scrapy/spiders/ecuador_emergency.py deleted file mode 100644 index 6a93c522c..000000000 --- a/kingfisher_scrapy/spiders/ecuador_emergency.py +++ /dev/null @@ -1,30 +0,0 @@ -import scrapy - -from kingfisher_scrapy.base_spiders import SimpleSpider -from kingfisher_scrapy.util import components, handle_http_error - - -class EcuadorEmergency(SimpleSpider): - """ - Domain - Servicio Nacional de Contratación Pública - Bulk download documentation - https://portal.compraspublicas.gob.ec/sercop/data-estandar-ocds/ - """ - name = 'ecuador_emergency' - - # SimpleSpider - data_type = 'release_package' - - def start_requests(self): - url = 'https://datosabiertos.compraspublicas.gob.ec/OCDS/' - yield scrapy.Request(url, meta={'file_name': 'list.html'}, callback=self.parse_list) - - @handle_http_error - def parse_list(self, response): - html_urls = response.xpath('//a/@href').getall() - if html_urls: - # Each link contains different versions of SERCOP's emergency dataset, only the newest should be downloaded - # URL format: ./archivos/ocds-YYYY-MM-DD.json - html_urls.sort(reverse=True) - yield self.build_request(f'{response.request.url}{html_urls[0]}', formatter=components(-1)) diff --git a/kingfisher_scrapy/spiders/mexico_oaxaca_iaip.py b/kingfisher_scrapy/spiders/mexico_oaxaca_iaip.py deleted file mode 100644 index 53cc48526..000000000 --- a/kingfisher_scrapy/spiders/mexico_oaxaca_iaip.py +++ /dev/null @@ -1,24 +0,0 @@ -from kingfisher_scrapy.spiders.mexico_inai_base import MexicoINAIBase - - -class MexicoOaxacaIAIP(MexicoINAIBase): - """ - Domain - Instituto de Acceso a la Información Publica y Protección de Datos Personales del Estado de Oaxaca (IAIPOXACA) - Spider arguments - from_date - Download only data from this year onward (YYYY format). - If ``until_date`` is provided, defaults to '2021'. - until_date - Download only data until this year (YYYY format). - If ``from_date`` is provided, defaults to the current year. - API documentation - http://contratacionesabiertas-iaipoaxaca-org.mx:4000/contratacionesabiertas/datosabiertos - """ - name = 'mexico_oaxaca_iaip' - - # BaseSpider - default_from_date = '2021' - - # MexicoINAIBase - base_url = 'http://contratacionesabiertas-iaipoaxaca-org.mx:3000' diff --git a/kingfisher_scrapy/spiders/nicaragua_solid_waste.py b/kingfisher_scrapy/spiders/nicaragua_solid_waste.py deleted file mode 100644 index 0d6d2ae4c..000000000 --- a/kingfisher_scrapy/spiders/nicaragua_solid_waste.py +++ /dev/null @@ -1,27 +0,0 @@ -from kingfisher_scrapy.base_spiders import SimpleSpider -from kingfisher_scrapy.util import components - - -class NicaraguaSolidWaste(SimpleSpider): - """ - Domain - Solid Waste Mitigation Platform (SWMP) - Spider arguments - from_date - Download only data from this date onward (YYYY-MM-DD format). Defaults to '2000-01-01'. - until_date - Download only data until this date (YYYY-MM-DD format). Defaults to today. - """ - name = 'nicaragua_solid_waste' - - # BaseSpider - default_from_date = '2000-01-01' - date_required = True - - # SimpleSpider - data_type = 'release_package' - - def start_requests(self): - url = f'http://www.gekoware.com/swmp/api/ocds/{self.from_date.strftime("%Y%m%d")}/' \ - f'{self.until_date.strftime("%Y%m%d")}' - yield self.build_request(url, formatter=components(-2)) diff --git a/kingfisher_scrapy/spiders/nigeria_budeshi_records.py b/kingfisher_scrapy/spiders/nigeria_budeshi_records.py deleted file mode 100644 index 9ad3057ec..000000000 --- a/kingfisher_scrapy/spiders/nigeria_budeshi_records.py +++ /dev/null @@ -1,18 +0,0 @@ -from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase -from kingfisher_scrapy.util import components - - -class NigeriaBudeshiRecords(NigeriaBudeshiBase): - """ - Domain - Budeshi Nigeria - API documentation - https://budeshi.ng/Api - """ - name = 'nigeria_budeshi_records' - - # SimpleSpider - data_type = 'record_package' - - def build_urls(self, project): - yield self.build_request(f'{self.base_url}record/{project["id"]}', formatter=components(-2)) diff --git a/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py b/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py deleted file mode 100644 index dcfca07fa..000000000 --- a/kingfisher_scrapy/spiders/nigeria_budeshi_releases.py +++ /dev/null @@ -1,34 +0,0 @@ -from kingfisher_scrapy.spiders.nigeria_budeshi_base import NigeriaBudeshiBase -from kingfisher_scrapy.util import components - - -class NigeriaBudeshiReleases(NigeriaBudeshiBase): - """ - Domain - Budeshi Nigeria - API documentation - https://budeshi.ng/Api - """ - name = 'nigeria_budeshi_releases' - - # BaseSpider - skip_pluck = 'Already covered (see code for details)' # nigeria*_budeshi_records - - # SimpleSpider - data_type = 'release_package' - - def build_urls(self, project): - for tag in ('planning', 'tender', 'award', 'contract'): - yield self.build_request(f'{self.base_url}releases/{project["id"]}/{tag}', formatter=components(-2)) - - def parse(self, response): - data = response.json() - # Some responses include a release list with null objects, e.g.: - # - # "releases": [ - # null - # ] - if data['releases'] == [None]: - yield self.build_file_error_from_response(response, errors=data) - else: - yield from super().parse(response) diff --git a/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_records.py b/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_records.py deleted file mode 100644 index c4489ccf4..000000000 --- a/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_records.py +++ /dev/null @@ -1,14 +0,0 @@ -from kingfisher_scrapy.spiders.nigeria_budeshi_records import NigeriaBudeshiRecords - - -class NigeriaKadunaStateBudeshiRecords(NigeriaBudeshiRecords): - """ - Domain - Nigeria - Kaduna State - API documentation - https://kadppaocds.azurewebsites.net/api - """ - name = 'nigeria_kaduna_state_budeshi_records' - - # NigeriaBudeshiBase - base_url = 'https://kadppaocds.azurewebsites.net/api/' diff --git a/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_releases.py b/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_releases.py deleted file mode 100644 index 233b4c0ac..000000000 --- a/kingfisher_scrapy/spiders/nigeria_kaduna_state_budeshi_releases.py +++ /dev/null @@ -1,14 +0,0 @@ -from kingfisher_scrapy.spiders.nigeria_budeshi_releases import NigeriaBudeshiReleases - - -class NigeriaKadunaStateBudeshiReleases(NigeriaBudeshiReleases): - """ - Domain - Nigeria - Kaduna State - API documentation - https://kadppaocds.azurewebsites.net/api - """ - name = 'nigeria_kaduna_state_budeshi_releases' - - # NigeriaBudeshiBase - base_url = 'https://kadppaocds.azurewebsites.net/api/' diff --git a/kingfisher_scrapy/spiders/tanzania_zabuni.py b/kingfisher_scrapy/spiders/tanzania_zabuni.py deleted file mode 100644 index 2b23ac496..000000000 --- a/kingfisher_scrapy/spiders/tanzania_zabuni.py +++ /dev/null @@ -1,37 +0,0 @@ -import scrapy - -from kingfisher_scrapy.base_spiders import SimpleSpider -from kingfisher_scrapy.util import components, handle_http_error - - -class TanzaniaZabuni(SimpleSpider): - """ - Domain - Tanzania Zabuni - API documentation - https://zabuni.co.tz/docs - """ - name = 'tanzania_zabuni' - download_delay = 1 # to avoid API 429 error "too many request" - - # SimpleSpider - data_type = 'release_package' - - url_prefix = 'https://app.zabuni.co.tz/api/releases/' - - def start_requests(self): - stages = ['tender', 'award', 'contract'] - for stage in stages: - yield scrapy.Request( - f'{self.url_prefix}{stage}', - meta={'file_name': 'list.json', 'stage': stage}, - callback=self.parse_list - ) - - @handle_http_error - def parse_list(self, response): - for release in response.json()['releases']: - yield self.build_request( - f'{self.url_prefix}{release["ocid"]}/{response.request.meta["stage"]}', - formatter=components(-2) - ) From d92eadae1a2f38345f75fb16c554d3e33c76b262 Mon Sep 17 00:00:00 2001 From: James McKinney <26463+jpmckinney@users.noreply.github.com> Date: Tue, 13 Dec 2022 11:33:36 -0500 Subject: [PATCH 2/2] docs: Add history entry --- docs/history.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/history.rst b/docs/history.rst index b09f47032..95949231c 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -3,6 +3,7 @@ Lapsed spiders This page records the spiders that were available but stopped publishing and therefore where removed from Kingfisher Collect, since January 2022: +- 2022-12-13: `ecuador_emergency, mexico_oaxaca_iaip, nicaragua_solid_waste, nigeria_budeshi_*, nigeria_kaduna_state_budeshi_*, tanzania_zabuni `__ - 2022-04-20: `afghanistan_*, indonesia_bandung `__ - 2022-01-27: `moldova_positive_initiative `__