diff --git a/process/analysis.py b/process/analysis.py index c5d33723..fd08f3ec 100644 --- a/process/analysis.py +++ b/process/analysis.py @@ -27,34 +27,40 @@ def archive_parameters(r, settings): 'project': settings, r.codename: r.config, } - - if os.path.isfile(f'{r.config["region_dir"]}/_parameters.yml'): + r = Region(r.codename) + parameters_exists = os.path.isfile( + f'{r.config["region_dir"]}/_parameters.yml', + ) + if parameters_exists: with open(f'{r.config["region_dir"]}/_parameters.yml') as f: saved_parameters = yaml.safe_load(f) - if ( - current_parameters['project'] == saved_parameters['project'] - and current_parameters[r.codename] == saved_parameters[r.codename] - ): - print_autobreak( - f"The copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} saved in the output directory as _parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", - ) - else: - shutil.copyfile( - f'{r.config["region_dir"]}/_parameters.yml', - f'{r.config["region_dir"]}/_parameters_{saved_parameters["date"]}.yml', - ) - with open(f'{r.config["region_dir"]}/_parameters.yml', 'w') as f: - yaml.safe_dump( - current_parameters, - f, - default_style=None, - default_flow_style=False, - sort_keys=False, - width=float('inf'), - ) - print_autobreak( - f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n", + else: + saved_parameters = None + if ( + saved_parameters is not None + and current_parameters['project'] == saved_parameters['project'] + and current_parameters[r.codename] == saved_parameters[r.codename] + ): + print_autobreak( + f"The copy of region and project parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} saved in the output directory as _parameters_{saved_parameters['date']}.yml matches the current configuration parameters and will be retained.\n\n", + ) + elif saved_parameters is not None: + shutil.copyfile( + f'{r.config["region_dir"]}/_parameters.yml', + f'{r.config["region_dir"]}/_parameters_{saved_parameters["date"]}.yml', + ) + with open(f'{r.config["region_dir"]}/_parameters.yml', 'w') as f: + yaml.safe_dump( + current_parameters, + f, + default_style=None, + default_flow_style=False, + sort_keys=False, + width=float('inf'), ) + print_autobreak( + f"Project or region parameters from a previous analysis dated {saved_parameters['date'].replace('_',' at ')} appear to have been modified. The previous parameter record file has been copied to the output directory as _parameters_{saved_parameters['date']}.yml, while the current ones have been saved as _parameters.yml.\n", + ) else: with open(f'{r.config["region_dir"]}/_parameters.yml', 'w') as f: yaml.safe_dump( @@ -156,7 +162,6 @@ def main(): except IndexError: codename = None r = Region(codename) - r.run_data_checks() r.analysis() diff --git a/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - landscape.jpg b/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - landscape.jpg index 2c893feb..17ac3f20 100644 Binary files a/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - landscape.jpg and b/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - landscape.jpg differ diff --git a/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - square.jpg b/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - square.jpg index 75bae4ed..f61a08e4 100644 Binary files a/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - square.jpg and b/process/configuration/assets/Example image of a vibrant, walkable, urban neighbourhood - square.jpg differ diff --git a/process/configuration/assets/region_template.yml b/process/configuration/assets/region_template.yml index 9f6232c1..1b4b327d 100644 --- a/process/configuration/assets/region_template.yml +++ b/process/configuration/assets/region_template.yml @@ -203,19 +203,19 @@ urban_region: EL_AV_ALS: Units: metres above sea level Unit description: metres above sea level - Description: The average elevation for the urban centre + Description: The average elevation estimated within the spatial domain of the Urban Centre, and expressed in metres above sea level (MASL) (EORC & JAXA, 2017). E_KG_NM_LST: Units: List of climate classes Unit description: List of climate classes - Description: semi-colon separated list of names of Köppen-Geiger climate classes, intersecting with the spatial domain of the Urban Centre + Description: Semi-colon separated list of names of Köppen-Geiger climate classes, intersecting with the spatial domain of the Urban Centre (1986-2010) (Rubel et al., 2017). E_WR_T_14: Units: °C Unit description: Average temperature in Celsius degrees (°C) - Description: average temperature calculated from annual average estimates for time interval centred on the year 1990 (the interval spans from 1988 to 1991) within the spatial domain of the Urban Centre + Description: Average temperature calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre, and expressed in Celsius degrees (°C) (Harris et al., 2014). E_WR_P_14: Units: mm - Unit description: the amount of rain per square meter in one hour (mm) - Description: average precipitations calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre + Unit description: The amount of rain per square meter in one hour (mm) + Description: Average precipitations calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre; and expressed in millimetres (mm), the amount of rain per square meter in one hour) (Harris et al., 2014). ## Query used to identify the specific urban region relevant for this region in the Urban Centres database ## GHS or other linkage of covariate data (GHS:variable='value', or path:variable='value' for a dataset with equivalently named fields defined in project parameters for air_pollution_covariates), e.g. GHS:UC_NM_MN=='Las Palmas de Gran Canaria' and CTR_MN_NM=='Spain' urban_query: @@ -233,10 +233,10 @@ country_gdp: # custom_destinations: ## name of file relative to project data directory # file: - ## category plain name - # dest_name: - ## category full name - # dest_name_full: + ## destination identifier/name + # name_field: + ## destination detailed name or description + # description_field: ## y coordinate # lat: ## x coordinate @@ -291,6 +291,9 @@ notes: ######### ## Reporting configuration (uncomment to modify) # reporting: + ## PDF report templates + # templates: + # - policy_spatial ## Set 'publication_ready' to True once you have checked results, updated the summary and are ready to publish; before then, it should be False. # publication_ready: False ## Once ready for publication it is recommended to register a DOI for your report, e.g. through figshare, zenodo or other repository @@ -316,6 +319,37 @@ notes: # country: ## After reviewing the results, update this summary text to contextualise your findings, and relate to external text and documents (e.g. using website hyperlinks). This text will be used in the report summary. # summary: + ## Contextual summary for study region in relevant languages. + ## The listed entry terms that follow each hyphen may also be translated for additional languages to provide summary and source prose. The context section will be used in new templates, under development as of July 2023. + # context: + ## A brief summary of region characteristics + # - Regional characterisation: + # - summary: + # - source: + ## Contextual information about the founding of the city, including relevant year(s) + # - City founding context: + # - summary: + # - source: + ## Briefly summary of socio-economic conditions and equity considerations for this city + # - Socio-economic conditions: + # - summary: + # - source: + ## Outline relevant weather/seasonal patterns + # - Weather: + # - summary: + # - source: + ## What is the topography like + # - Topography: + # - summary: + # - source: + ## Environmental disasters likely to be faced + # - Anticipated environmental disaster risks: + # - summary: + # - source: + ## Anything else of note + # - Additional contextual information: + # - summary: + # - source: ## Optionally, exceptions to the template can be specified here, this can be useful for additional translation customisation without modifying the report_configuration.xlsx file. These phrases can incorporate translated phrases defined in report configuration, by enclosing these in curly braces, e.g. like {this}, if 'this' has been defined as a phrase in the relevant language. See the example region for a demonstration of how this can be used. Sections from the example can be pasted here and modified as required, or the below example can be uncommented. ## exceptions: ## "English": diff --git a/process/configuration/regions/example_ES_Las_Palmas_2023.yml b/process/configuration/regions/example_ES_Las_Palmas_2023.yml index 68a45dcf..f2a2e1f1 100644 --- a/process/configuration/regions/example_ES_Las_Palmas_2023.yml +++ b/process/configuration/regions/example_ES_Las_Palmas_2023.yml @@ -200,19 +200,19 @@ urban_region: EL_AV_ALS: Units: metres above sea level Unit description: metres above sea level - Description: The average elevation for the urban centre + Description: The average elevation estimated within the spatial domain of the Urban Centre, and expressed in metres above sea level (MASL) (EORC & JAXA, 2017). E_KG_NM_LST: Units: List of climate classes Unit description: List of climate classes - Description: semi-colon separated list of names of Köppen-Geiger climate classes, intersecting with the spatial domain of the Urban Centre + Description: Semi-colon separated list of names of Köppen-Geiger climate classes, intersecting with the spatial domain of the Urban Centre (1986-2010) (Rubel et al., 2017). E_WR_T_14: Units: °C Unit description: Average temperature in Celsius degrees (°C) - Description: average temperature calculated from annual average estimates for time interval centred on the year 1990 (the interval spans from 1988 to 1991) within the spatial domain of the Urban Centre + Description: Average temperature calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre, and expressed in Celsius degrees (°C) (Harris et al., 2014). E_WR_P_14: Units: mm - Unit description: the amount of rain per square meter in one hour (mm) - Description: average precipitations calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre + Unit description: The amount of rain per square meter in one hour (mm) + Description: Average precipitations calculated from annual average estimates for time interval centred on the year 2015 (the interval spans from 2012 to 2015) within the spatial domain of the Urban Centre; and expressed in millimetres (mm), the amount of rain per square meter in one hour) (Harris et al., 2014). ## Query used to identify the specific urban region relevant for this region in the Urban Centres database ## GHS or other linkage of covariate data (GHS:variable='value', or path:variable='value' for a dataset with equivalently named fields defined in project parameters for air_pollution_covariates), e.g. GHS:UC_NM_MN=='Las Palmas de Gran Canaria' and CTR_MN_NM=='Spain' urban_query: GHS:UC_NM_MN=='Las Palmas de Gran Canaria' and CTR_MN_NM=='Spain' @@ -229,10 +229,10 @@ country_gdp: # custom_destinations: ## name of file relative to project data directory # file: - ## category plain name - # dest_name: - ## category full name - # dest_name_full: + ## destination identifier/name + # name_field: + ## destination detailed name or description + # description_field: ## y coordinate # lat: ## x coordinate @@ -274,21 +274,22 @@ notes: ######### ## Reporting configuration reporting: + # PDF report templates + templates: + - policy_spatial ## Set 'publication_ready' to True once you have checked results, updated the summary and are ready to publish; before then, it should be False. publication_ready: False ## Once ready for publication it is recommended to register a DOI for your report, e.g. through figshare, zenodo or other repository doi: images: - ## Store images in the process/configuration/assets folder. - ## Update file name, description and credit as required. 1: file: Example image of a vibrant, walkable, urban neighbourhood - landscape.jpg description: Example image of a vibrant, walkable, urban neighbourhood with diverse people using active modes of transport and a tram (replace with a photograph, customised in region configuration) - credit: Carl Higgs, Bing Image Creator, 2023 + credit: "Feature inspiring healthy, sustainable urban design from your city (.jpg, 2100px by 1000px), crediting the source, e.g.: Carl Higgs, Bing Image Creator, 2023" 2: file: Example image of a vibrant, walkable, urban neighbourhood - square.jpg description: Example image of a vibrant, walkable, urban neighbourhood with diverse people using active modes of transport and a tram (replace with a photograph, customised in region configuration) - credit: Carl Higgs, Bing Image Creator, 2023 + credit: "Feature inspiring healthy, sustainable urban design from your city (.jpg, 1000px by 1000px), crediting the source, e.g.: Carl Higgs, Bing Image Creator, 2023" ## Languages configuration languages: ## Add a list of languages as required. Languages listed should correspond to columns in the _report_configuration.xlsx file 'languages' worksheet. New languages can be added, although some may require additional fonts. Some languages may not be supported (eg. complex scripts like Tamil and Thai may not be supported by the report template and require manual edits). @@ -299,25 +300,56 @@ reporting: country: Spain ## After reviewing the results, update this summary text to contextualise your findings, and relate to external text and documents (e.g. using website hyperlinks). This text will be used in the report summary. summary: | - After reviewing the results, update this summary text to contextualise your findings, and relate to external text and documents (e.g. using website hyperlinks). + After reviewing results for your city, provide a contextualised summary by adding or modifying the "summary" exception text for each relevant language using the reporting exceptions in the region configuration file. Relate findings to external references and documents for your city using hyperlinks. + ## Contextual summary for study region spatial report. Users may choose to translate these entries (following the hyphen) for languages configured for their city. Remember, if you put a colon (":") in, put quotes around the text to make it explicitly understood as text. + context: + # A brief summary of region characteristics + - Regional characterisation: + # for example, for Las Palmas one aspect of interest could be, Vegetation biome characterised as Mediterranean Forests, Woodlands, and Scrub (Olson et al., 2001). + - summary: "Describe import details that are important for understanding your city's context" + - source: Olson et al., 2001 cited in Global Human Settlements Layer Urban Centres Database (r2019a) + # Contextual information about the founding of the city, including relevant year(s) + - City founding context: + - summary: Edit these details in the region configuration file. + - source: + # Briefly summary of socio-economic conditions and equity considerations for this city + - Socio-economic conditions: + - summary: Highlight important dynamics relating to equity and socio-economic contexts. + - source: + # Outline relevant weather/seasonal patterns + - Weather: + - summary: Desert (arid), and Hot arid with annual average (2012-15) temperature of 20°C and 260mm of precipitation. + - source: Rubel et al., 2017 and Harris et al., 2014 cited in Global Human Settlements Layer Urban Centres Database (r2019a) + # What is the topography like + - Topography: + - summary: Is your city hilly? Provide some detail. Las Palmas has mean elevation of 106 metres above sea level. + - source: EORC & JAXA, 2017 cited in Global Human Settlements Layer Urban Centres Database (r2019a) + # Environmental disasters likely to be faced + - Anticipated environmental disaster risks: + - summary: What are important considerations for the future of your city? + - source: + # Anything else of note + - Additional contextual information: + - summary: "Provide more detail and link to resources, like https://healthysustainablecities.org" + - source: Spanish - Spain: name: Las Palmas de Gran Canaria country: España summary: | - Después de revisar los resultados, actualice este texto de resumen para contextualizar sus hallazgos y relacionarlo con textos y documentos externos (por ejemplo, utilizando hipervínculos de sitios web). + Después de revisar los resultados de su ciudad, proporcione un resumen contextualizado agregando o modificando el texto de excepción de "resumen" para cada idioma relevante utilizando las excepciones de informes en el archivo de configuración de la región. Relacione los hallazgos con referencias externas y documentos de su ciudad mediante hipervínculos. Chinese - Simplified: name: 大加那利岛拉斯帕尔马斯 country: 西班牙 summary: | - 查看结果后,更新此摘要文本以将您的发现置于上下文中,并与外部文本和文档相关(例如使用网站超链接)。 + 查看您所在城市的结果后,通过使用区域配置文件中的报告例外添加或修改每种相关语言的“摘要”例外文本来提供上下文摘要。 使用超链接将调查结果与您所在城市的外部参考和文档相关联。 ## Optionally, exceptions to the template can be specified here, this can be useful for additional translation customisation without modifying the report_configuration.xlsx file. These phrases can incorporate translated phrases defined in report configuration, by enclosing these in curly braces, e.g. like {this}, if 'this' has been defined as a phrase in the relevant language. See the example region for a demonstration of how this can be used. Sections from the example can be pasted here and modified as required, or the below example can be uncommented. exceptions: "English": - 'local_collaborators_names': 'Add your names here, or modify authors in config.yml and remove this line' + 'local_collaborators_names': 'Add your names here by adding an exception entry for "local_collaborators_names" for each language in your study region configuration file, or by modifying "authors" in config.yml.' "Spanish - Spain": - 'local_collaborators_names': 'Agregue sus nombres aquí, o modifique los autores en config.yml y elimine esta línea' + 'local_collaborators_names': 'Agregue sus nombres aquí agregando una entrada de excepción para "local_collaborators_names" para cada idioma en el archivo de configuración de su región de estudio, o modificando "autores" en config.yml.' 'citation_doi': '{local_collaborators_names}. 2022. {title_city} — {title_series_line1} {title_series_line2} ({city}, {country} — Healthy and Sustainable City Indicators Report. Traducción al español (España): {translation_names}). {city_doi}' "Chinese - Simplified": - 'local_collaborators_names': '在此处添加您的姓名,或在 config.yml 中修改作者并删除此行' + 'local_collaborators_names': '通过在您的研究区域配置文件中为每种语言添加“local_collaborators_names”的例外条目,或通过修改 config.yml 中的“authors”,在此处添加您的姓名。' 'citation_doi': '{local_collaborators_names}. 2022. {title_city} — {title_series_line1} {title_series_line2} ({city}, {country} — Healthy and Sustainable City Indicators Report: Comparisons with 25 cities internationally. {language} {translation}: {translation_names}). {city_doi}' ######### diff --git a/process/configuration/templates/_report_configuration.xlsx b/process/configuration/templates/_report_configuration.xlsx index 43a90837..7274a385 100644 Binary files a/process/configuration/templates/_report_configuration.xlsx and b/process/configuration/templates/_report_configuration.xlsx differ diff --git a/process/data/urban_regions/Example/Las Palmas de Gran Canaria - GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg b/process/data/urban_regions/Example/Las Palmas de Gran Canaria - GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg index d240486f..460a7f94 100644 Binary files a/process/data/urban_regions/Example/Las Palmas de Gran Canaria - GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg and b/process/data/urban_regions/Example/Las Palmas de Gran Canaria - GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg differ diff --git a/process/data/urban_regions/Example/readme_V1_2.txt b/process/data/urban_regions/Example/readme_V1_2.txt new file mode 100644 index 00000000..d01d0606 --- /dev/null +++ b/process/data/urban_regions/Example/readme_V1_2.txt @@ -0,0 +1,58 @@ +** Note: the geopackage file Las Palmas de Gran Canaria - GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg contains a single feature excerpt from the GHSL UCDB R2019A v1.2 for the Spanish city of Las Palmas de Gran Canaria and is intended for an example demonstration of the Global Healthy and Sustainable City Indicators tool. The readme information below pertains to the source dataset (https://ghsl.jrc.ec.europa.eu/ghs_stat_ucdb2015mt_r2019a.php). Carl Higgs, 25 July 2023. ** + + +Name GHS Urban Centre Database 2015, multitemporal and multidimensional attributes, R2019A +Dataset ID GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2 +Version V1_2 +Data Last Update 07/04/2020 +Alternative name (with version) GHS Urban Centre Database 2015, Public Release 2019, Version 1.2 +Accronym GHS-UCDB 2015 R2019 V1.2 +Authors Florczyk, Aneta J.; Corbane, Christina; Schiavina, Marcello; Pesaresi, Martino; Maffenini, Luca; Melchiorri, Michele; Politis, Panagiotis; Sabo, Filip; Freire, Sergio; Ehrlich, Daniele; Kemper, Thomas; Tommasi, Pierpaolo; Airaghi, Donato; Zanchetta, Luigi +Document Version V1.2 +Document Last Update 07/04/2020 + +Contact point JRC-GHSLEC.EUROPA.EU GHSL project related questions + JRC-GHSL-DATAEC.EUROPA.EU GHSL data related questions + +What's new in V1.2 + Fields update: + - E_EC2E_E75, E_EC2E_E90, E_EC2E_E00, E_EC2E_E15, E_EC2E_R75, E_EC2E_R90, E_EC2E_R00, E_EC2E_R15, E_EC2E_I75, E_EC2E_I90, E_EC2E_I00, E_EC2E_I15, E_EC2E_T75, E_EC2E_T90, E_EC2E_T00, E_EC2E_T15, E_EC2E_A75, E_EC2E_A90, E_EC2E_A00, E_EC2E_A15, E_EC2O_E75, E_EC2O_E90, E_EC2O_E00, E_EC2O_E15, E_EC2O_R75, E_EC2O_R90, E_EC2O_R00, E_EC2O_R15, E_EC2O_I75, E_EC2O_I90, E_EC2O_I00, E_EC2O_I15, E_EC2O_T75, E_EC2O_T90, E_EC2O_T00, E_EC2O_T15, E_EC2O_A75, E_EC2O_A90, E_EC2O_A00, E_EC2O_A15, E_EPM2_E75, E_EPM2_E90, E_EPM2_E00, E_EPM2_E15, E_EPM2_R75, E_EPM2_R90, E_EPM2_R00, E_EPM2_R15, E_EPM2_I75, E_EPM2_I90, E_EPM2_I00, E_EPM2_I15, E_EPM2_T75, E_EPM2_T90, E_EPM2_T00, E_EPM2_T15, E_EPM2_A75, E_EPM2_A90, E_EPM2_A00, E_EPM2_A15 (new values updated) + Output of EDGARv5.0 (Crippa et al., 2019). Data of 2012 are substituted with 2015. Compared to the emissions provided by EDGAR v4.3.2 (used V1.0 and V1.1) new population-based spatial proxies have been developed using the GHSL Data Package 2019; Florczyk et al., 2019) + + Fixed issue of datatype for value fields + +What's new in V1.1 + Fields update: + - UC_NM_MN, UC_NM_LST, UC_NM_SRC (new inputs and algorithm; see section 2.4.4 in http://ghsl.jrc.ec.europa.eu/documents/GHSL_Data_Package_2019.pdf) + - TT2CC (fixed bug in the algorithm) + - E_GR_AV90, E_GR_AV00, E_GR_AV14 (fixed data export to excel and vector attribute list) + + Vector format to open source .gpkg (GeoPackage) + + Tables encoding in UTF-8 + +Disclaimer + " The JRC data are provided ""as is"" and ""as available"" in conformity with theJRC Data Policy1and theCommission Decision on reuse of Commission documents(2011/833/EU). Although the JRC guarantees its best effort in assuring quality when publishing these data, it provides them without any warranty of warranty of any kind, either express or implied, including, but not limited to, any implied warranty against infringement of third parties' property rights, or merchantability, integration, satisfactory quality and fitness for a particular purpose. The JRC has no obligation to provide technical support or remedies for the data. The JRC does not represent or warrant that the data will be error free or uninterrupted, or that all non-conformities can or will be corrected, or that any data are accurate or complete, or that they are of a satisfactory technical or scientific quality. The JRC or as the case may be the European Commission shall not be held liable for any direct or indirect, incidental, consequential or other damages, including but not limited to the loss of data, loss of profits, or any other financial loss arising from the use of the JRC data, or inability to use them, even if the JRC is notified of the possibility of such damages. + The designations employed and the presentation of material on this map do not imply the expression of any opinion whatsoever on the part of the European Union concerning the legal status of any country, territory, city or area or of its authorities, or concerning the delimitation of its frontiers or boundaries. Kosovo: This designation is without prejudice to positions on status, and is in line with UNSCR 1244/1999 and the ICJ Opinion on the Kosovo declaration of independence. Palestine: This designation shall not be construed as recognition of a State of Palestine and is without prejudice to the individual positions of the Member States on this issue. " + +Licence + European Commission Reuse and Copyright Notice + European Union, 1995-2019 + Reuse is authorised, provided the source is acknowledged. The reuse policy of the European Commission is implemented by a Decision of 12 December 2011. + Disclaimer + Unless the following would not be permitted or valid under applicable law, the following applies to the data/information provided by the JRC: + The JRC data are provided "as is" and "as available" without warranty of any kind, either express or implied, including, but not limited to, any implied warranty against infringement of third parties' property rights, or merchantability, integration, absence of latent or other defects, satisfactory quality and fitness for a particular purpose. The JRC data do not constitute professional or legal advice (if you need specific advice, you should always consult a suitably qualified professional). + The JRC has no obligation to provide technical support or remedies for the data. The JRC does not represent or warrant that the data will be error free or uninterrupted, or that all non-conformities can or will be corrected, or that any data are accurate or complete, or that they are of a satisfactory technical or scientific quality. + The JRC or as the case may be the European Commission shall not be held liable for any direct or indirect, incidental, consequential or other damages, including but not limited to the loss of data, loss of profits, or any other financial loss arising from the use of the JRC data, or inability to use them, even if the JRC is notified of the possibility of such damages. + Decision of 12 December 2011. + +Dataset Citation + Florczyk, A.J., Corbane, C., Schiavina, M., Pesaresi, M., Maffenini, L., Melchiorri, M., Politis, P., Sabo, F., Freire, S., Ehrlich, D., Kemper, T., Tommasi, P., Airaghi, D. and L. Zanchetta. 2019. GHS Urban Centre Database 2015, multitemporal and multidimensional attributes, R2019A. European Commission, Joint Research Centre (JRC) [Dataset] PID: http://data.europa.eu/89h/53473144-b88c-44bc-b4a3-4583ed1f547e + +Related Resources + Description of the GHS-UCDB 2015 R2019 V1.0 + Florczyk, A.J., Melchiorri, M., Corbane, C., Schiavina, M., Maffenini, M., Pesaresi, M., Politis, P., Sabo, S., Freire, S., Ehrlich, D., Kemper, T., Tommasi, P., Airaghi, D. and L. Zanchetta, Description of the GHS Urban Centre Database 2015, Public Release 2019, Version 1.0, Publications Office of the European Union, Luxembourg, 2019, ISBN 978-92-79-99753-2, doi:10.2760/037310, JRC115586. + Atlas of the Human Planet 2018 + European Commision, Joint Research Centre, Atlas of the Human Planet 2018. A world of cities. EUR 29497 EN, Publications Office of the European Union, Luxembourg, 2018, doi:10.2760/124503 + GHSL data (CR2018) used in production of the database. + Florczyk, A.J., Ehrlich, D., Corbane, C., Freire, S., Kemper, T., Melchiorri, M., Pesaresi, M., Politis, P., Schiavina, M., Zanchetta, L., Community pre-Release of GHS Data Package (GHS CR2018) in support to the GEO Human Planet Initiative, EUR 29466 EN, Publications Office of the European Union, Luxembourg, 2018, doi:10.2760/777868 diff --git a/process/generate.py b/process/generate.py index 4e999bdf..aad3f6b1 100644 --- a/process/generate.py +++ b/process/generate.py @@ -28,37 +28,7 @@ ) -def generate(r): - if type(r) == str: - codename = r - r = Region(codename) - else: - codename = r.codename - print(r.header) - r.config['codename'] = codename - r.config['__version__'] = __version__ - r.config['templates'] = ['web'] - r.config[ - 'report_configuration' - ] = './configuration/_report_configuration.xlsx' - r.config['folder_path'] = folder_path - r.config['date_hhmm'] = date_hhmm - r.config['authors'] = settings['documentation']['authors'] - with open(f"{r.config['region_dir']}/_parameters.yml") as f: - r.config['parameters'] = yaml.safe_load(f) - if not os.path.exists(r.config['region_dir']): - sys.exit( - f"\n\nProcessed resource folder for this city couldn't be located:" - f'\n[{r.config["region_dir"]}]' - '\nPlease ensure city has been successfully processed before continuing\n', - ) - # List existing generated resources - print('Analysis parameter summary text file') - print(' _parameters.yml') - print('\nAnalysis log text file') - print(f" __{r.config['name']}__{codename}_processing_log.txt") - print('\nData files') - print(f" {os.path.basename(r.config['gpkg'])}") +def export_indicators(r, gpkg=True, csv=True): custom_aggregations = r.config.pop('custom_aggregations', {}) tables = [f'indicators_{x}' for x in custom_aggregations] + [ r.config['city_summary'], @@ -83,36 +53,66 @@ def generate(r): print( f"The following tables were not found in the database, and so not exported: {', '.join(tables_not_in_database)} (please ensure processing has been completed to export these)", ) - postgis_to_geopackage( - r.config['gpkg'], - settings['sql']['db_host'], - settings['sql']['db_user'], - r.config['db'], - settings['sql']['db_pwd'], - [t for t in tables if t in r.tables], - ) - for layer in ['city', 'grid']: - if r.config[f'{layer}_summary'] in r.tables: + if gpkg: + if os.path.exists(r.config['gpkg']): print( - ' ' - + os.path.basename( - r.to_csv( - r.config[f'{layer}_summary'], - f"{r.config['region_dir']}/{r.codename}_{r.config[f'{layer}_summary']}.csv", - ), - ), + f" {r.config['gpkg'].replace(r.config['region_dir'],'')} (exists; delete or rename to re-generate)", ) - for layer in custom_aggregations: - if layer in r.tables: - print( - ' ' - + os.path.basename( - r.to_csv( - f'indicators_{layer}', - f"{r.config['region_dir']}/{r.codename}_indicators_{layer}.csv", - ), - ), + else: + print(f" {os.path.basename(r.config['gpkg'])}") + postgis_to_geopackage( + r.config['gpkg'], + settings['sql']['db_host'], + settings['sql']['db_user'], + r.config['db'], + settings['sql']['db_pwd'], + [t for t in tables if t in r.tables], ) + if csv: + for layer in ['city', 'grid'] + [x for x in custom_aggregations]: + if layer in ['city', 'grid']: + table = r.config[f'{layer}_summary'] + else: + table = f'indicators_{layer}' + if table in r.tables: + file = f"{r.config['region_dir']}/{r.codename}_{table}.csv" + if os.path.exists(file): + print( + f" {file.replace(r.config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + print(' ' + os.path.basename(r.to_csv(table, file))) + + +def generate(r): + if type(r) == str: + codename = r + r = Region(codename) + else: + codename = r.codename + print(r.header) + r.config['codename'] = codename + r.config['__version__'] = __version__ + r.config['folder_path'] = folder_path + r.config['date_hhmm'] = date_hhmm + r.config['authors'] = settings['documentation']['authors'] + """List resources that have been generated for this study region.""" + if not os.path.exists(r.config['region_dir']): + sys.exit( + f"\n\nProcessed resource folder for this city couldn't be located:" + f'\n[{r.config["region_dir"]}]' + '\nPlease ensure city has been successfully processed before continuing\n', + ) + if os.path.exists(f"{r.config['region_dir']}/_parameters.yml"): + with open(f"{r.config['region_dir']}/_parameters.yml") as f: + r.config['parameters'] = yaml.safe_load(f) + print('\nAnalysis parameter summary text file') + print(' _parameters.yml') + if os.path.exists(r.log): + print('\nAnalysis log text file') + print(os.path.basename(r.log)) + print('\nData files') + export_indicators(r) # Generate data dictionary print('\nData dictionaries') required_assets = [ diff --git a/process/subprocesses/_05_compile_destinations.py b/process/subprocesses/_05_compile_destinations.py index 5e3eb2ee..69bd7ccc 100644 --- a/process/subprocesses/_05_compile_destinations.py +++ b/process/subprocesses/_05_compile_destinations.py @@ -14,26 +14,26 @@ from sqlalchemy import text -def custom_destination_setup(engine, r): +def custom_destination_setup(r): import pandas as pd df = pd.read_csv( - f'{r.config["region_dir"]}/{r.config["custom_destinations"]["file"]}', + f'/home/ghsci/process/data/{r.config["custom_destinations"]["file"]}', ) - df.to_sql('r.config["custom_destinations"]', engine, if_exists='replace') + df.to_sql('custom_destinations', r.engine, if_exists='replace') sql = f""" INSERT INTO destinations (dest_name,dest_name_full,geom) - SELECT {r.config["custom_destinations"]["dest_name"]}::text dest_name, - {r.config["custom_destinations"]["dest_name_full"]}::text dest_name_full, + SELECT "{r.config["custom_destinations"]["name_field"]}"::text dest_name, + "{r.config["custom_destinations"]["description_field"]}"::text dest_name_full, ST_Transform(ST_SetSRID(ST_Point( "{r.config["custom_destinations"]["lon"]}"::float, "{r.config["custom_destinations"]["lat"]}"::float), {r.config["custom_destinations"]["epsg"]}), - {crs['srid']} + {r.config['crs']['srid']} ) geom - FROM r.config["custom_destinations"]; + FROM custom_destinations; """ - with engine.begin() as connection: + with r.engine.begin() as connection: connection.execute(text(sql)) @@ -181,7 +181,7 @@ def compile_destinations(codename): and r.config['custom_destinations'] is not None and r.config['custom_destinations']['file'] is not None ): - custom_destination_setup(engine, r) + custom_destination_setup(r) create_destinations_indices = """ CREATE INDEX destinations_dest_name_idx ON destinations (dest_name); diff --git a/process/subprocesses/_09_urban_covariates.py b/process/subprocesses/_09_urban_covariates.py index 77e98000..33307d6b 100644 --- a/process/subprocesses/_09_urban_covariates.py +++ b/process/subprocesses/_09_urban_covariates.py @@ -9,6 +9,7 @@ import geopandas as gpd import ghsci +import numpy as np import pandas as pd from script_running_log import script_running_log from sqlalchemy import text diff --git a/process/subprocesses/_utils.py b/process/subprocesses/_utils.py index e426dee5..0823c962 100644 --- a/process/subprocesses/_utils.py +++ b/process/subprocesses/_utils.py @@ -10,10 +10,13 @@ import time from textwrap import wrap -import contextily as ctx -import fiona +# import contextily as ctx +# import fiona import geopandas as gpd import matplotlib as mpl +import matplotlib.font_manager as fm +import matplotlib.pyplot as plt +import matplotlib.ticker as ticker import numpy as np import pandas as pd from babel.numbers import format_decimal as fnum @@ -21,7 +24,8 @@ from fpdf import FPDF, FlexTemplate from mpl_toolkits.axes_grid1 import make_axes_locatable from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar -from mpl_toolkits.axes_grid1.inset_locator import inset_axes + +# from mpl_toolkits.axes_grid1.inset_locator import inset_axes # 'pretty' text wrapping as per https://stackoverflow.com/questions/37572837/how-can-i-make-python-3s-print-fit-the-size-of-the-command-prompt @@ -142,10 +146,10 @@ def get_valid_languages(config): no_language_warning = "No valid languages found in region configuration. This is required for report generation. A default parameterisation will be used for reporting in English. To further customise for your region and requirements, please add and update the reporting section in your region's configuration file." default_language = setup_default_language(config) configured_languages = pd.read_excel( - config['report_configuration'], sheet_name='languages', + config['reporting']['configuration'], sheet_name='languages', ).columns[2:] configured_fonts = pd.read_excel( - config['report_configuration'], sheet_name='fonts', + config['reporting']['configuration'], sheet_name='fonts', ) if config['reporting']['languages'] is None: print_autobreak(f'\nNote: {no_language_warning}') @@ -165,7 +169,7 @@ def get_valid_languages(config): print_autobreak( f"\nNote: Some languages specified in this region's configuration file ({', '.join(languages_not_configured)}) have not been set up with translations in the report configuration 'languages' worksheet. Reports will only be generated for those languages that have had prose translations set up ({', '.join(configured_languages)}).", ) - required_keys = {'country', 'summary', 'name'} + required_keys = {'country', 'summary', 'name', 'context'} languages_configured_have_required_keys = [ x for x in languages_configured if languages[x].keys() == required_keys ] @@ -220,6 +224,40 @@ def setup_default_language(config): 'name': config['name'], 'country': config['country'], 'summary': 'After reviewing the results, update this summary text to contextualise your findings, and relate to external text and documents (e.g. using website hyperlinks).', + 'context': [ + { + 'Regional characterisation': [ + {'summary': None}, + {'source': None}, + ], + }, + { + 'City founding context': [ + {'summary': None}, + {'source': None}, + ], + }, + { + 'Socio-economic conditions': [ + {'summary': None}, + {'source': None}, + ], + }, + {'Weather': [{'summary': None}, {'source': None}]}, + {'Topography': [{'summary': None}, {'source': None}]}, + { + 'Anticipated environmental disaster risks': [ + {'summary': None}, + {'source': None}, + ], + }, + { + 'Additional contextual information': [ + {'summary': None}, + {'source': None}, + ], + }, + ], }, } return languages @@ -228,6 +266,8 @@ def setup_default_language(config): def check_and_update_config_reporting_parameters(config): """Checks config reporting parameters and updates these if necessary.""" reporting_default = { + 'configuration': './configuration/_report_configuration.xlsx', + 'templates': ['policy_spatial'], 'publication_ready': False, 'doi': None, 'images': { @@ -252,13 +292,14 @@ def check_and_update_config_reporting_parameters(config): reporting = reporting_default.copy() else: reporting = config['reporting'].copy() - reporting['languages'] = get_valid_languages(config) for key in reporting_default.keys(): if key not in reporting.keys(): reporting[key] = reporting_default[key] print_autobreak( f"\nNote: Reporting parameter '{key}' not found in region configuration. Using default value of '{reporting_default[key]}'. To further customise for your region and requirements, please add and update the reporting section in your region's configuration file.", ) + config['reporting'] = reporting + reporting['languages'] = get_valid_languages(config) return reporting @@ -312,7 +353,7 @@ def generate_report_for_language( cmap, ) # instantiate template - for template in r.config['templates']: + for template in r.config['reporting']['templates']: print(f'\nReport ({template} PDF template; {language})') capture_return = generate_scorecard( r.config, @@ -323,14 +364,18 @@ def generate_report_for_language( template, font, ) + print(capture_return) else: - capture_return = ' - Skipped: This language has not been flagged for export in _report_configuration.xlsx (some languages such as Tamil may have features to their writing that currently are not supported, such as Devaganari conjuncts; perhaps for this reason it has not been flagged for export, or otherwise it has not been fully configured).' - print(capture_return) + print( + ' - Skipped: This language has not been flagged for export in _report_configuration.xlsx (some languages such as Tamil may have features to their writing that currently are not supported, such as Devaganari conjuncts; perhaps for this reason it has not been flagged for export, or otherwise it has not been fully configured).', + ) def get_and_setup_font(language, config): """Setup and return font for given language configuration.""" - fonts = pd.read_excel(config['report_configuration'], sheet_name='fonts') + fonts = pd.read_excel( + config['reporting']['configuration'], sheet_name='fonts', + ) if language.replace(' (Auto-translation)', '') in fonts.Language.unique(): fonts = fonts.loc[ fonts['Language'] == language.replace(' (Auto-translation)', '') @@ -338,12 +383,12 @@ def get_and_setup_font(language, config): else: fonts = fonts.loc[fonts['Language'] == 'default'].fillna('') main_font = fonts.File.values[0].strip() - mpl.font_manager.fontManager.addfont(main_font) - prop = mpl.font_manager.FontProperties(fname=main_font) - mpl.font_manager.findfont( + fm.fontManager.addfont(main_font) + prop = fm.FontProperties(fname=main_font) + fm.findfont( prop=prop, directory=main_font, rebuild_if_missing=True, ) - mpl.pyplot.rcParams['font.family'] = prop.get_name() + plt.rcParams['font.family'] = prop.get_name() font = fonts.Font.values[0] return font @@ -437,19 +482,21 @@ def generate_resources( city_stats = compile_city_stats(gdf_city, indicators, phrases) if not os.path.exists(figure_path): os.mkdir(figure_path) - # Spatial access liveability profile - li_profile( - city_stats=city_stats, - title=phrases['Population % with access within 500m to...'], - cmap=cmap, - phrases=phrases, - path=f'{figure_path}/access_profile_{language}.jpg', - ) - print(f' figures/access_profile_{language}.jpg') - ## constrain extreme outlying walkability for representation - gdf_grid['all_cities_walkability'] = gdf_grid[ - 'all_cities_walkability' - ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) + # Access profile + file = f'{figure_path}/access_profile_{language}.jpg' + if os.path.exists(file): + print( + f" {file.replace(config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + li_profile( + city_stats=city_stats, + title=phrases['Population % with access within 500m to...'], + cmap=cmap, + phrases=phrases, + path=file, + ) + print(f' figures/access_profile_{language}.jpg') # Spatial distribution maps spatial_maps = compile_spatial_map_info( indicators['report']['spatial_distribution_figures'], @@ -458,62 +505,95 @@ def generate_resources( locale, language=language, ) + ## constrain extreme outlying walkability for representation + gdf_grid['all_cities_walkability'] = gdf_grid[ + 'all_cities_walkability' + ].apply(lambda x: -6 if x < -6 else (6 if x > 6 else x)) for f in spatial_maps: - spatial_dist_map( - gdf_grid, - column=f, - range=spatial_maps[f]['range'], - label=spatial_maps[f]['label'], - tick_labels=spatial_maps[f]['tick_labels'], - cmap=cmap, - path=f'{figure_path}/{spatial_maps[f]["outfile"]}', - phrases=phrases, - locale=locale, - ) - print(f" figures/{spatial_maps[f]['outfile']}") + file = f'{figure_path}/{spatial_maps[f]["outfile"]}' + if os.path.exists(file): + print( + f" {file.replace(config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + spatial_dist_map( + gdf_grid, + column=f, + range=spatial_maps[f]['range'], + label=spatial_maps[f]['label'], + tick_labels=spatial_maps[f]['tick_labels'], + cmap=cmap, + path=file, + phrases=phrases, + locale=locale, + ) + print(f" figures/{spatial_maps[f]['outfile']}") # Threshold maps for scenario in indicators['report']['thresholds']: - threshold_map( - gdf_grid, - column=indicators['report']['thresholds'][scenario]['field'], - scale=indicators['report']['thresholds'][scenario]['scale'], - comparison=indicators['report']['thresholds'][scenario][ - 'criteria' - ], - label=( - f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" - ), - cmap=cmap, - path=f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", - phrases=phrases, - locale=locale, - ) - print( - f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", - ) - # Policy ratings - policy_rating( - range=[0, 24], - score=city_policy['Presence_rating'], - comparison=indicators['report']['policy']['comparisons']['presence'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_presence_rating_{language}.jpg', - ) - print(f' figures/policy_presence_rating_{language}.jpg') - policy_rating( - range=[0, 57], - score=city_policy['Checklist_rating'], - comparison=indicators['report']['policy']['comparisons']['quality'], - label='', - comparison_label=phrases['25 city comparison'], - cmap=cmap, - locale=locale, - path=f'{figure_path}/policy_checklist_rating_{language}.jpg', - ) - print(f' figures/policy_checklist_rating_{language}.jpg') + file = f"{figure_path}/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg" + if os.path.exists(file): + print( + f" {file.replace(config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + threshold_map( + gdf_grid, + column=indicators['report']['thresholds'][scenario]['field'], + scale=indicators['report']['thresholds'][scenario]['scale'], + comparison=indicators['report']['thresholds'][scenario][ + 'criteria' + ], + label=( + f"{phrases[indicators['report']['thresholds'][scenario]['title']]} ({phrases['density_units']})" + ), + cmap=cmap, + path=file, + phrases=phrases, + locale=locale, + ) + print( + f" figures/{indicators['report']['thresholds'][scenario]['field']}_{language}.jpg", + ) + if any(['policy' in x for x in config['reporting']['templates']]): + # Policy ratings + file = f'{figure_path}/policy_presence_rating_{language}.jpg' + if os.path.exists(file): + print( + f" {file.replace(config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + policy_rating( + range=[0, 24], + score=city_policy['Presence_rating'], + comparison=indicators['report']['policy']['comparisons'][ + 'presence' + ], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=file, + ) + print(f' figures/policy_presence_rating_{language}.jpg') + file = f'{figure_path}/policy_checklist_rating_{language}.jpg' + if os.path.exists(file): + print( + f" {file.replace(config['region_dir'],'')} (exists; delete or rename to re-generate)", + ) + else: + policy_rating( + range=[0, 57], + score=city_policy['Checklist_rating'], + comparison=indicators['report']['policy']['comparisons'][ + 'quality' + ], + label='', + comparison_label=phrases['25 city comparison'], + cmap=cmap, + locale=locale, + path=file, + ) + print(f' figures/policy_checklist_rating_{language}.jpg') return figure_path @@ -691,6 +771,8 @@ def li_profile( Expands on https://www.python-graph-gallery.com/web-circular-barplot-with-matplotlib -- A python code blog post by Yan Holtz, in turn expanding on work of Tomás Capretto and Tobias Stadler. """ + import matplotlib.colors as mpl_colors + figsize = (width, height) # Values for the x axis ANGLES = np.linspace( @@ -701,11 +783,11 @@ def li_profile( INDICATORS = city_stats['access'].index # Colours GREY12 = '#1f1f1f' - norm = mpl.colors.Normalize(vmin=0, vmax=100) + norm = mpl_colors.Normalize(vmin=0, vmax=100) COLORS = cmap(list(norm(VALUES))) # Initialize layout in polar coordinates textsize = 11 - fig, ax = mpl.pyplot.subplots( + fig, ax = plt.subplots( figsize=figsize, subplot_kw={'projection': 'polar'}, ) # Set background color to white, both axis and figure. @@ -791,7 +873,7 @@ def li_profile( bbox_to_anchor=(0.58 + np.cos(angle) / 2, 0.46 + np.sin(angle) / 2), ) fig.savefig(path, dpi=dpi) - mpl.pyplot.close(fig) + plt.close(fig) return path @@ -813,7 +895,7 @@ def spatial_dist_map( """Spatial distribution maps using geopandas geodataframe.""" figsize = (width, height) textsize = 14 - fig, ax = mpl.pyplot.subplots(figsize=figsize) + fig, ax = plt.subplots(figsize=figsize) ax.set_axis_off() divider = make_axes_locatable(ax) # Define 'divider' for the axes # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and @@ -844,7 +926,7 @@ def spatial_dist_map( multiplier=1000, units='kilometer', locale=locale, - fontproperties=mpl.font_manager.FontProperties(size=textsize), + fontproperties=fm.FontProperties(size=textsize), ) # north arrow add_localised_north_arrow(ax, text=phrases['north arrow']) @@ -854,13 +936,13 @@ def spatial_dist_map( if tick_labels is not None: # cax.set_xticks(cax.get_xticks().tolist()) # cax.set_xticklabels(tick_labels) - cax.xaxis.set_major_locator(mpl.ticker.MaxNLocator(len(tick_labels))) + cax.xaxis.set_major_locator(ticker.MaxNLocator(len(tick_labels))) ticks_loc = cax.get_xticks().tolist() - cax.xaxis.set_major_locator(mpl.ticker.FixedLocator(ticks_loc)) + cax.xaxis.set_major_locator(ticker.FixedLocator(ticks_loc)) cax.set_xticklabels(tick_labels) - mpl.pyplot.tight_layout() + plt.tight_layout() fig.savefig(path, dpi=dpi) - mpl.pyplot.close(fig) + plt.close(fig) return path @@ -881,7 +963,7 @@ def threshold_map( """Create threshold indicator map.""" figsize = (width, height) textsize = 14 - fig, ax = mpl.pyplot.subplots(figsize=figsize) + fig, ax = plt.subplots(figsize=figsize) ax.set_axis_off() divider = make_axes_locatable(ax) # Define 'divider' for the axes # Legend axes will be located at the 'bottom' of figure, with width '5%' of ax and @@ -910,17 +992,17 @@ def threshold_map( multiplier=1000, units='kilometer', locale=locale, - fontproperties=mpl.font_manager.FontProperties(size=textsize), + fontproperties=fm.FontProperties(size=textsize), ) # north arrow add_localised_north_arrow(ax, text=phrases['north arrow']) # axis formatting - cax.xaxis.set_major_formatter(mpl.ticker.EngFormatter()) + cax.xaxis.set_major_formatter(ticker.EngFormatter()) cax.tick_params(labelsize=textsize) cax.xaxis.label.set_size(textsize) - mpl.pyplot.tight_layout() + plt.tight_layout() fig.savefig(path, dpi=dpi) - mpl.pyplot.close(fig) + plt.close(fig) return path @@ -930,7 +1012,7 @@ def policy_rating( cmap, comparison=None, width=fpdf2_mm_scale(70), - height=fpdf2_mm_scale(15), + height=fpdf2_mm_scale(12), label='Policies identified', comparison_label='25 city median', locale='en', @@ -942,13 +1024,16 @@ def policy_rating( Applied in this context for policy presence and policy quality scores. """ + import matplotlib.cm as mpl_cm + import matplotlib.colors as mpl_colors + textsize = 14 - fig, ax = mpl.pyplot.subplots(figsize=(width, height)) + fig, ax = plt.subplots(figsize=(width, height)) fig.subplots_adjust(bottom=0) cmap = cmap - norm = mpl.colors.Normalize(vmin=range[0], vmax=range[1]) + norm = mpl_colors.Normalize(vmin=range[0], vmax=range[1]) fig.colorbar( - mpl.cm.ScalarMappable(norm=norm, cmap=cmap), + mpl_cm.ScalarMappable(norm=norm, cmap=cmap), cax=ax, orientation='horizontal', # shrink=0.9, pad=0, aspect=90 @@ -957,7 +1042,7 @@ def policy_rating( if comparison is None: ax.xaxis.set_ticks([]) else: - ax.xaxis.set_major_locator(mpl.ticker.FixedLocator([comparison])) + ax.xaxis.set_major_locator(ticker.FixedLocator([comparison])) # ax.set_xticklabels([comparison_label]) ax.set_xticklabels(['']) ax.tick_params(labelsize=textsize) @@ -979,7 +1064,7 @@ def policy_rating( # Format City ticks ax_city = ax.twiny() ax_city.set_xlim(range) - ax_city.xaxis.set_major_locator(mpl.ticker.FixedLocator([score])) + ax_city.xaxis.set_major_locator(ticker.FixedLocator([score])) ax_city.plot( score, 1, @@ -995,19 +1080,20 @@ def policy_rating( [f"{sep}{str(score).rstrip('0').rstrip('.')}/{range[1]}{label}"], ) ax_city.tick_params(labelsize=textsize) - # return figure with final styling - xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" - ax.set_xlabel( - xlabel, labelpad=0.5, fontsize=textsize, - ) - mpl.pyplot.tight_layout() + if comparison is not None: + # return figure with final styling + xlabel = f"{comparison_label} ({fnum(comparison,'0.0',locale)})" + ax.set_xlabel( + xlabel, labelpad=0.5, fontsize=textsize, + ) + plt.tight_layout() fig.savefig(path, dpi=dpi) - mpl.pyplot.close(fig) + plt.close(fig) return path def pdf_template_setup( - config, template='template_web', font=None, language='English', + config, template, font=None, language='English', ): """ Takes a template xlsx sheet defining elements for use in fpdf2's FlexTemplate function. @@ -1022,7 +1108,7 @@ def pdf_template_setup( """ # read in elements elements = pd.read_excel( - config['report_configuration'], sheet_name=template, + config['reporting']['configuration'], sheet_name=template, ) document_pages = elements.page.unique() # Conditional formatting to help avoid inappropriate line breaks and gaps in Tamil and Thai @@ -1049,11 +1135,13 @@ def pdf_template_setup( planes = {'foreground': '000000', 'background': None} for i, element in enumerate(elements): for plane in planes: - if elements[i][plane] is not None: + if elements[i][plane] not in [None, 'None', 0]: # this assumes a hexadecimal string without the 0x prefix elements[i][plane] = int(elements[i][plane], 16) - else: + elif plane == 'foreground': elements[i][plane] = int(planes[plane], 16) + else: + elements[i][plane] = None pages = {} for page in document_pages: pages[f'{page}'] = [x for x in elements if x['page'] == page] @@ -1067,16 +1155,7 @@ def format_pages(pages, phrases): if item['name'] in phrases: try: pages[page][i]['text'] = phrases[item['name']].format( - city=phrases['city_name'], - city_name=phrases['city_name'], - country=phrases['country'], - study_doi='https://healthysustainablecities.org', - citation_series=phrases['citation_series'], - citation_doi=phrases['citation_doi'], - citation_population=phrases['citation_population'], - citation_boundaries=phrases['citation_boundaries'], - citation_features=phrases['citation_features'], - citation_colour=phrases['citation_colour'], + **phrases, ) except Exception: pages[f'{page}'][i]['text'] = phrases[item['name']] @@ -1088,13 +1167,14 @@ def prepare_phrases(config, language): import babel languages = pd.read_excel( - config['report_configuration'], sheet_name='languages', + config['reporting']['configuration'], sheet_name='languages', ) phrases = json.loads(languages.set_index('name').to_json())[language] city_details = config['reporting'] phrases['city'] = config['name'] phrases['city_name'] = city_details['languages'][language]['name'] phrases['country'] = city_details['languages'][language]['country'] + phrases['study_doi'] = 'https://healthysustainablecities.org' phrases['summary'] = city_details['languages'][language]['summary'] phrases['title_city'] = phrases['title_city'].format( city_name=phrases['city_name'], country=phrases['country'], @@ -1152,12 +1232,21 @@ def prepare_phrases(config, language): # handle city-specific exceptions language_exceptions = city_details['exceptions'] if (language_exceptions is not None) and (language in language_exceptions): - for e in language_exceptions: - phrases[e] = language_exceptions[e] + for e in language_exceptions[language]: + phrases[e] = language_exceptions[language][e] for citation in citations: if citation != 'citation_doi' or 'citation_doi' not in phrases: phrases[citation] = citations[citation].format(**phrases) phrases['citation_doi'] = phrases['citation_doi'].format(**phrases) + # Conditional draft marking if not flagged as publication ready + if config['reporting']['publication_ready']: + phrases['metadata_title2'] = '' + phrases['title_series_line2'] = '' + phrases['filename_publication_check'] = '' + else: + phrases['citation_doi'] = phrases['citation_doi'] + ' (DRAFT)' + phrases['title_city'] = phrases['title_city'] + ' (DRAFT)' + phrases['filename_publication_check'] = ' (DRAFT)' return phrases @@ -1187,7 +1276,9 @@ def wrap_sentences(words, limit=50, delimiter=''): def prepare_pdf_fonts(pdf, config, language): """Prepare PDF fonts.""" - fonts = pd.read_excel(config['report_configuration'], sheet_name='fonts') + fonts = pd.read_excel( + config['reporting']['configuration'], sheet_name='fonts', + ) fonts = ( fonts.loc[ fonts['Language'].isin( @@ -1216,15 +1307,15 @@ def prepare_pdf_fonts(pdf, config, language): ) -def save_pdf_layout(pdf, folder, template, filename): +def save_pdf_layout(pdf, folder, filename): """Save a PDF report in template subfolder in specified location.""" if not os.path.exists(folder): os.mkdir(folder) - template_folder = f'{folder}/_{template} reports' + template_folder = f'{folder}/reports' if not os.path.exists(template_folder): os.mkdir(template_folder) pdf.output(f'{template_folder}/{filename}') - return f' _{template} reports/{filename}'.replace('/home/ghsci/', '') + return f' reports/{filename}'.replace('/home/ghsci/', '') def generate_scorecard( @@ -1233,7 +1324,7 @@ def generate_scorecard( indicators, city_policy, language='English', - template='web', + template='policy_spatial', font=None, ): """ @@ -1241,17 +1332,12 @@ def generate_scorecard( Included in this function is the marking of a policy 'scorecard', with ticks, crosses, etc. """ + from ghsci import Region, date + + r = Region(config['codename']) + r.config = config locale = phrases['locale'] - # Set up PDF document template pages - if config['reporting']['publication_ready']: - phrases['metadata_title2'] = '' - phrases['title_series_line2'] = '' - phrases['filename_publication_check'] = '' - else: - phrases['citation_doi'] = phrases['citation_doi'] + ' (DRAFT)' - phrases['title_city'] = phrases['title_city'] + ' (DRAFT)' - phrases['filename_publication_check'] = ' (DRAFT)' - pages = pdf_template_setup(config, 'template_web', font, language) + pages = pdf_template_setup(config, template, font, language) pages = format_pages(pages, phrases) # initialise PDF pdf = FPDF(orientation='portrait', format='A4', unit='mm') @@ -1260,49 +1346,50 @@ def generate_scorecard( pdf.set_author(phrases['metadata_author']) pdf.set_title(f"{phrases['metadata_title1']} {phrases['metadata_title2']}") pdf.set_auto_page_break(False) - if template == 'web': - pdf = pdf_for_web( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) - elif template == 'print': - pdf = pdf_for_print( - pdf, - pages, - config, - language, - locale, - phrases, - indicators, - city_policy, - ) + pdf = generate_pdf( + pdf, + pages, + r, + template, + language, + locale, + phrases, + indicators, + city_policy, + ) # Output report pdf - filename = f"{phrases['city_name']} - {phrases['title_series_line1'].replace(':','')} - GHSCIC 2022 - {phrases['vernacular']}{phrases['filename_publication_check']}.pdf" + filename = f"GOHSC {date[:4]} - {template} report - {phrases['city_name']} - {phrases['vernacular']}{phrases['filename_publication_check']}.pdf" capture_result = save_pdf_layout( - pdf, folder=config['region_dir'], template=template, filename=filename, + pdf, folder=config['region_dir'], filename=filename, ) return capture_result -def pdf_for_web( - pdf, pages, config, language, locale, phrases, indicators, city_policy, +def generate_pdf( + pdf, + pages, + r, + report_template, + language, + locale, + phrases, + indicators, + city_policy, ): """ Generate a PDF based on a template for web distribution. This template includes reporting on both policy and spatial indicators. """ + config = r.config city_path = config['region_dir'] figure_path = f'{city_path}/figures' # Set up Cover page pdf.add_page() template = FlexTemplate(pdf, elements=pages['1']) + template['title_author'] = template['title_author'].format( + template=report_template.replace('_', '/'), + ) if os.path.exists( f'{config["folder_path"]}/process/configuration/assets/{phrases["Image 1 file"]}', ): @@ -1334,6 +1421,17 @@ def pdf_for_web( ] = f"{phrases['series_intro']}\n\n{phrases['series_interpretation']}".format( **phrases, ) + if report_template == 'spatial': + study_region_context_file = study_region_map( + r.get_engine(), + config, + urban_shading=True, + basemap='satellite', + arrow_colour='white', + scale_box=True, + file_name='study_region_boundary', + ) + template['study_region_context'] = study_region_context_file ## Access profile plot template['access_profile'] = f'{figure_path}/access_profile_{language}.jpg' ## Walkability plot @@ -1354,34 +1452,56 @@ def pdf_for_web( locale, ), ) - ## Policy ratings - template[ - 'presence_rating' - ] = f'{figure_path}/policy_presence_rating_{language}.jpg' - template[ - 'quality_rating' - ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' - template['city_header'] = phrases['city_name'] - ## City planning requirement presence (round 0.5 up to 1) - policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} - for x in range(1, 7): - # check presence - template[f'policy_urban_text{x}_response'] = policy_indicators[ - city_policy['Presence'][x - 1] + if 'policy' in report_template: + ## Policy ratings + template[ + 'presence_rating' + ] = f'{figure_path}/policy_presence_rating_{language}.jpg' + template[ + 'quality_rating' + ] = f'{figure_path}/policy_checklist_rating_{language}.jpg' + ## City planning requirement presence (round 0.5 up to 1) + template['city_header'] = phrases['city_name'] + policy_indicators = {0: '✗', 0.5: '~', 1: '✓'} + for x in range(1, 7): + # check presence + template[f'policy_urban_text{x}_response'] = policy_indicators[ + city_policy['Presence'][x - 1] + ] + # format percentage units according to locale + for gdp in ['middle', 'upper']: + template[f'policy_urban_text{x}_{gdp}'] = _pct( + float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), + locale, + length='short', + ) + ## Walkable neighbourhood policy checklist + for i, policy in enumerate(city_policy['Checklist'].index): + row = i + 1 + for j, item in enumerate([x for x in city_policy['Checklist'][i]]): + col = j + 1 + template[ + f"policy_{'Checklist'}_text{row}_response{col}" + ] = item + elif 'spatial' in report_template: + context = config['reporting']['languages']['English']['context'] + keys = [ + ''.join(x) + for x in config['reporting']['languages']['English']['context'] ] - # format percentage units according to locale - for gdp in ['middle', 'upper']: - template[f'policy_urban_text{x}_{gdp}'] = _pct( - float(city_policy['Presence_gdp'].iloc[x - 1][gdp]), - locale, - length='short', + # blurb = '\n\n'.join([f"{k}\n{d[k][0]['summary'] if d[k][0]['summary'] is not None else 'None specified'}" for k,d in zip(keys,context)]) + blurb = [ + ( + k, + d[k][0]['summary'] + if d[k][0]['summary'] is not None + else 'None specified', ) - ## Walkable neighbourhood policy checklist - for i, policy in enumerate(city_policy['Checklist'].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy['Checklist'][i]]): - col = j + 1 - template[f"policy_{'Checklist'}_text{row}_response{col}"] = item + for k, d in zip(keys, context) + ] + for i, item in enumerate(blurb): + template[f'region_context_header{i+1}'] = item[0] + template[f'region_context_text{i+1}'] = item[1] template.render() # Set up next page pdf.add_page() @@ -1429,14 +1549,19 @@ def pdf_for_web( template[ 'pct_access_500m_public_open_space_large_score' ] = f'{figure_path}/pct_access_500m_public_open_space_large_score_{language}.jpg' + if 'policy' in report_template: + ## Checklist ratings for PT and POS + for analysis in ['PT', 'POS']: + for i, policy in enumerate(city_policy[analysis].index): + row = i + 1 + for j, item in enumerate( + [x for x in city_policy[analysis][i]], + ): + col = j + 1 + template[ + f'policy_{analysis}_text{row}_response{col}' + ] = item template['city_text'] = phrases['summary'] - ## Checklist ratings for PT and POS - for analysis in ['PT', 'POS']: - for i, policy in enumerate(city_policy[analysis].index): - row = i + 1 - for j, item in enumerate([x for x in city_policy[analysis][i]]): - col = j + 1 - template[f'policy_{analysis}_text{row}_response{col}'] = item template.render() # Set up last page pdf.add_page() @@ -1534,15 +1659,17 @@ def choropleth_map( def add_color_bar(ax, data, cmap): + import matplotlib.axes as mpl_axes + # Create colorbar as a legend vmin, vmax = data.min(), data.max() # sm = plt.cm.ScalarMappable(cmap=’Blues’, norm=plt.Normalize(vmin=vmin, vmax=vmax)) divider = make_axes_locatable(ax) cax = divider.append_axes( - 'right', size='5%', pad=0.5, axes_class=mpl.axes.Axes, + 'right', size='5%', pad=0.5, axes_class=mpl_axes.Axes, ) - sm = mpl.pyplot.cm.ScalarMappable( - cmap=cmap, norm=mpl.pyplot.Normalize(vmin=vmin, vmax=vmax), + sm = plt.cm.ScalarMappable( + cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax), ) # empty array for the data range sm._A = [] @@ -1568,9 +1695,8 @@ def study_region_map( ): """Plot study region boundary.""" import cartopy.crs as ccrs - import cartopy.io.img_tiles as cimgt import cartopy.io.ogc_clients as ogcc - from shapely.geometry import box + import matplotlib.patheffects as path_effects from subprocesses.batlow import batlow_map as cmap file_name = re.sub(r'\W+', '_', file_name) @@ -1581,20 +1707,13 @@ def study_region_map( ) return filepath else: - fontprops = mpl.font_manager.FontProperties(size=12) urban_study_region = gpd.GeoDataFrame.from_postgis( 'SELECT * FROM urban_study_region', engine, geom_col='geom', ).to_crs(epsg=3857) - bounding_box = box(*buffered_box(urban_study_region.total_bounds, 500)) - urban_buffer = gpd.GeoDataFrame( - gpd.GeoSeries(bounding_box), columns=['geometry'], crs=3857, - ) - clip_box = mpl.transforms.Bbox.from_extents(*urban_buffer.total_bounds) - xmin, ymin, xmax, ymax = urban_study_region.total_bounds # initialise figure - fig = mpl.pyplot.figure() + fig = plt.figure() ax = fig.add_subplot(1, 1, 1, projection=ccrs.epsg(3857)) - mpl.pyplot.axis('equal') + plt.axis('equal') # basemap helper codes ogcc.METERS_PER_UNIT['urn:ogc:def:crs:EPSG:6.3:3857'] = 1 ogcc._URN_TO_CRS[ @@ -1723,7 +1842,7 @@ def study_region_map( map_attribution, fontsize=7, path_effects=[ - mpl.patheffects.withStroke( + path_effects.withStroke( linewidth=2, foreground='w', alpha=0.5, ), ], @@ -1743,7 +1862,7 @@ def study_region_map( multiplier=1000, units='kilometer', locale=locale, - fontproperties=mpl.font_manager.FontProperties(size=textsize), + fontproperties=fm.FontProperties(size=textsize), loc='upper left', pad=0.2, color='black', @@ -1758,12 +1877,12 @@ def study_region_map( textcolor=arrow_colour, ) ax.set_axis_off() - mpl.pyplot.subplots_adjust( + plt.subplots_adjust( left=0, bottom=0.1, right=1, top=1, wspace=0, hspace=0, ) fig.savefig(filepath, dpi=dpi) + fig.clf() print(f' figures/{os.path.basename(filepath)}') - mpl.pyplot.close(fig) return filepath diff --git a/process/subprocesses/analysis_report.py b/process/subprocesses/analysis_report.py index 5b43e246..f3986c5b 100644 --- a/process/subprocesses/analysis_report.py +++ b/process/subprocesses/analysis_report.py @@ -14,122 +14,12 @@ ImageFile.LOAD_TRUNCATED_IMAGES = True -def format_date(date, format='%Y-%m-%d'): - """Format date as string.""" - from datetime import date as datetime_date - - if type(date) is datetime_date: - return date.strftime(format) - else: - return str(date) - - -def region_boundary_blurb_attribution( - name, study_region_boundary, urban_region, urban_query, -): - """Generate a blurb and attribution for the study region boundary.""" - sources = [] - if ( - study_region_boundary == 'urban_query' - or type(study_region_boundary) == dict - and 'data' in study_region_boundary - and study_region_boundary['data'] == 'urban_query' - ): - blurb_1 = f"The study region boundary was defined using an SQL query that was run using ogr2ogr to import the corresponding features from {urban_region['name']} to the database." - sources.append( - f"{urban_region['name']} under {urban_region['licence']}", - ) - else: - blurb_1 = f"The study region boundary was defined and imported to the database using ogr2ogr with data sourced from [{study_region_boundary['source']} ({format_date(study_region_boundary['publication_date'],'%Y')})]({study_region_boundary['url']})." - sources.append( - f"{study_region_boundary['source']} under {study_region_boundary['licence']}", - ) - if ( - 'ghsl_urban_intersection' in study_region_boundary - and study_region_boundary['ghsl_urban_intersection'] - ): - blurb_2 = f""" The urban portion of {name} was identified using the intersection of the study region boundary and urban regions sourced from {urban_region['name']} published as {urban_region['citation']}.""" - sources.append( - f"{urban_region['name']} under {urban_region['licence']}", - ) - else: - blurb_2 = f""" This study region boundary was taken to represent the {name} urban region.""" - if urban_query: - blurb_3 = f""" The SQL query used to extract urban areas from {urban_region['name']} was: {urban_query}.""" - else: - blurb_3 = '' - return { - 'blurb': blurb_1 + blurb_2 + blurb_3, - 'sources': set(sources), - } - - -def network_description(region_config): - blurbs = [] - blurbs.append( - f"""The [OSMnx](https://geoffboeing.com/2016/11/osmnx-python-street-networks/#) software package was used to derive an undirected [non-planar](https://geoffboeing.com/publications/osmnx-complex-street-networks/) pedestrian network of edges (lines) and nodes (vertices, or intersections) for the buffered study region area using the following custom definition: **{region_config['network']['pedestrian']}**. This definition was used to retrieve matching data via Overpass API for {region_config['OpenStreetMap']['publication_date']}.""", - ) - if region_config['network']['osmnx_retain_all']: - blurbs.append( - 'The network was extracted using OSMnx with the "retain_all" parameter set to __True__. This meant that all network segments were retained, including those that were not connected to the main network. This could mean that isolated network segments could be included, which could be problematic for evaluating accessibility if these are not truly disconnected in reality; this should be considered when reviewing results.', - ) - else: - blurbs.append( - 'The network was extracted using OSMnx with the "retain_all" parameter set to __False__. This meant that only the main connected network was retained. In many circumstances this is the appropriate setting, however please ensure this is appropriate for your study region, as networks on real islands may be excluded.', - ) - if region_config['network']['polygon_iteration']: - blurb = 'To account for multiple disconnected pedestrian networks within the study region (for example, as may occur in a city spanning several islands), the network was extracted iteratively for each polygon of the study region boundary multipolygon. This meant that the network was extracted for each polygon, and then the resulting networks were combined to form the final network.' - if type(region_config['network']['connection_threshold']) == int: - blurb = f"""{blurb}. Network islands were only included if meeting a minimum total network distance threshold set at {region_config['network']['connection_threshold']} metres. """ - blurbs.append(blurb) - blurbs.append( - f"""The OSMnx [consolidate_intersections()](https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.simplification.consolidate_intersections) function was used to prepare a dataset of cleaned intersections with three or more legs, using a tolerance parameter of {region_config['network']['intersection_tolerance']} to consolidate network nodes within this distance as a single node. This ensures that intersections that exist for representational or connectivity purposes (for example a roundabout, that may be modelled with multiple nodes but in effect is a single intersections) do not inflate estimates when evaluating street connectivity for pedestrians.""", - ) - blurbs.append( - 'The derived pedestrian network nodes and edges, and the dataset of cleaned intersections were stored in the PostGIS database.', - ) - return ' '.join(blurbs) - - -def get_analysis_report_region_configuration(region_config, settings): - """Generate the region configuration for the analysis report.""" - region_config['OpenStreetMap'][ - 'note' - ] = f". The following note was recorded: __{region_config['OpenStreetMap']['note'] if 'note' in region_config['OpenStreetMap'] and region_config['OpenStreetMap']['note'] is not None else ''}__" - region_config['OpenStreetMap']['publication_date'] = datetime.strptime( - str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d', - ).strftime('%d %B %Y') - region_config['study_buffer'] = settings['project']['study_buffer'] - region_config['study_region_blurb'] = region_boundary_blurb_attribution( - region_config['name'], - region_config['study_region_boundary'], - region_config['urban_region'], - region_config['urban_query'], - ) - region_config['network']['pedestrian'] = settings['network_analysis'][ - 'pedestrian' - ] - region_config['network']['description'] = network_description( - region_config, - ) - if 'data_type' in region_config['population'] and region_config[ - 'population' - ]['data_type'].startswith('vector'): - region_config[ - 'population_grid_setup' - ] = f'used the field "{region_config["population"]["vector_population_data_field"]}" to source estimates' - else: - region_config['population_grid_setup'] = ( - f'grid had a resolution of {region_config["population"]["resolution"]} m', - ) - return region_config - - def compile_analysis_report(engine, region_config, settings): """Compile the analysis report for the region.""" - region_config = get_analysis_report_region_configuration( - region_config, settings, - ) + openstreetmap_date = datetime.strptime( + str(region_config['OpenStreetMap']['publication_date']), '%Y%m%d', + ).strftime('%d %B %Y') + openstreetmap_note = f". The following note was recorded: __{region_config['OpenStreetMap']['note'] if 'note' in region_config['OpenStreetMap'] and region_config['OpenStreetMap']['note'] is not None else ''}__" # prepare images study_region_context_file = study_region_map( engine, @@ -163,7 +53,7 @@ def compile_analysis_report(engine, region_config, settings): 'markersize': None, }, }, - additional_attribution=f"""Pedestrian network edges: OpenStreetMap contributors ({region_config['OpenStreetMap']['publication_date']}), under {region_config['OpenStreetMap']['licence']}; network detail, including nodes and cleaned intersections can be explored using desktop mapping software like QGIS, using a connection to the {region_config['db']} database.""", + additional_attribution=f"""Pedestrian network edges: OpenStreetMap contributors ({openstreetmap_date}), under {region_config['OpenStreetMap']['licence']}; network detail, including nodes and cleaned intersections can be explored using desktop mapping software like QGIS, using a connection to the {region_config['db']} database.""", ) population_grid = study_region_map( engine, @@ -185,7 +75,16 @@ def compile_analysis_report(engine, region_config, settings): additional_attribution=f"""Population grid estimates: {region_config['population']['name']}, under {region_config['population']['licence']}.""", ) destination_plots = {} - for dest in df_osm_dest['dest_full_name'].unique(): + relavent_destinations = [ + 'Fresh Food / Market', + 'Convenience', + 'Public transport stop (any)', + ] + for dest in [ + x + for x in df_osm_dest['dest_full_name'].unique() + if x in relavent_destinations + ]: destination_plots[dest] = study_region_map( engine, region_config, @@ -204,7 +103,7 @@ def compile_analysis_report(engine, region_config, settings): 'where': f"""WHERE dest_name_full = '{dest}'""", }, }, - additional_attribution=f"""{dest} counts: OpenStreetMap contributors ({region_config['OpenStreetMap']['publication_date']}), under {region_config['OpenStreetMap']['licence']}.""", + additional_attribution=f"""{dest} counts: OpenStreetMap contributors ({openstreetmap_date}), under {region_config['OpenStreetMap']['licence']}.""", ) # prepare tables osm_destination_definitions = df_osm_dest[ @@ -275,7 +174,9 @@ def compile_analysis_report(engine, region_config, settings): ), ( 'blurb', - 'OpenStreetMap data published {OpenStreetMap[publication_date]} were sourced from [{OpenStreetMap[source]} ({OpenStreetMap[publication_date]})]({OpenStreetMap[url]}){OpenStreetMap[note]} The buffered urban study region boundary was used to extract the region of interest from the source data using osmconvert and save this to the study region output folder. Features for this region were then imported to the PostGIS database using osm2pgsql, and with geometries updated to match the project coordinate reference system.'.format( + 'OpenStreetMap data published {openstreetmap_date} were sourced from [{OpenStreetMap[source]} ({openstreetmap_date})]({OpenStreetMap[url]}){openstreetmap_note} The buffered urban study region boundary was used to extract the region of interest from the source data using osmconvert and save this to the study region output folder. Features for this region were then imported to the PostGIS database using osm2pgsql, and with geometries updated to match the project coordinate reference system.'.format( + openstreetmap_date=openstreetmap_date, + openstreetmap_note=openstreetmap_note, **region_config, ), ), diff --git a/process/subprocesses/ghsci.py b/process/subprocesses/ghsci.py index 8e042166..1c2b55c3 100644 --- a/process/subprocesses/ghsci.py +++ b/process/subprocesses/ghsci.py @@ -96,56 +96,448 @@ def get_region_names() -> list: return region_names +def region_boundary_blurb_attribution( + name, study_region_boundary, urban_region, urban_query, +): + """Generate a blurb and attribution for the study region boundary.""" + sources = [] + if ( + study_region_boundary == 'urban_query' + or type(study_region_boundary) == dict + and 'data' in study_region_boundary + and study_region_boundary['data'] == 'urban_query' + ): + blurb_1 = f"The study region boundary was defined using an SQL query that was run using ogr2ogr to import the corresponding features from {urban_region['name']} to the database." + sources.append( + f"{urban_region['name']} under {urban_region['licence']}", + ) + else: + blurb_1 = f"The study region boundary was defined and imported to the database using ogr2ogr with data sourced from [{study_region_boundary['source']} ({format_date(study_region_boundary['publication_date'],'%Y')})]({study_region_boundary['url']})." + sources.append( + f"{study_region_boundary['source']} under {study_region_boundary['licence']}", + ) + if ( + 'ghsl_urban_intersection' in study_region_boundary + and study_region_boundary['ghsl_urban_intersection'] + ): + blurb_2 = f""" The urban portion of {name} was identified using the intersection of the study region boundary and urban regions sourced from {urban_region['name']} published as {urban_region['citation']}.""" + sources.append( + f"{urban_region['name']} under {urban_region['licence']}", + ) + else: + blurb_2 = f""" This study region boundary was taken to represent the {name} urban region.""" + if urban_query: + blurb_3 = f""" The SQL query used to extract urban areas from {urban_region['name']} was: {urban_query}.""" + else: + blurb_3 = '' + return { + 'blurb': blurb_1 + blurb_2 + blurb_3, + 'sources': set(sources), + } + + +def network_description(region_config): + blurbs = [] + blurbs.append( + f"""The [OSMnx](https://geoffboeing.com/2016/11/osmnx-python-street-networks/#) software package was used to derive an undirected [non-planar](https://geoffboeing.com/publications/osmnx-complex-street-networks/) pedestrian network of edges (lines) and nodes (vertices, or intersections) for the buffered study region area using the following custom definition: **{region_config['network']['pedestrian']}**. This definition was used to retrieve matching data via Overpass API for {region_config['OpenStreetMap']['publication_date']}.""", + ) + if region_config['network']['osmnx_retain_all']: + blurbs.append( + 'The network was extracted using OSMnx with the "retain_all" parameter set to __True__. This meant that all network segments were retained, including those that were not connected to the main network. This could mean that isolated network segments could be included, which could be problematic for evaluating accessibility if these are not truly disconnected in reality; this should be considered when reviewing results.', + ) + else: + blurbs.append( + 'The network was extracted using OSMnx with the "retain_all" parameter set to __False__. This meant that only the main connected network was retained. In many circumstances this is the appropriate setting, however please ensure this is appropriate for your study region, as networks on real islands may be excluded.', + ) + if region_config['network']['polygon_iteration']: + blurb = 'To account for multiple disconnected pedestrian networks within the study region (for example, as may occur in a city spanning several islands), the network was extracted iteratively for each polygon of the study region boundary multipolygon. This meant that the network was extracted for each polygon, and then the resulting networks were combined to form the final network.' + if type(region_config['network']['connection_threshold']) == int: + blurb = f"""{blurb}. Network islands were only included if meeting a minimum total network distance threshold set at {region_config['network']['connection_threshold']} metres. """ + blurbs.append(blurb) + blurbs.append( + f"""The OSMnx [consolidate_intersections()](https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.simplification.consolidate_intersections) function was used to prepare a dataset of cleaned intersections with three or more legs, using a tolerance parameter of {region_config['network']['intersection_tolerance']} to consolidate network nodes within this distance as a single node. This ensures that intersections that exist for representational or connectivity purposes (for example a roundabout, that may be modelled with multiple nodes but in effect is a single intersections) do not inflate estimates when evaluating street connectivity for pedestrians.""", + ) + blurbs.append( + 'The derived pedestrian network nodes and edges, and the dataset of cleaned intersections were stored in the PostGIS database.', + ) + return ' '.join(blurbs) + + +def get_analysis_report_region_configuration(region_config, settings): + """Generate the region configuration for the analysis report.""" + region_config['study_buffer'] = settings['project']['study_buffer'] + region_config['study_region_blurb'] = region_boundary_blurb_attribution( + region_config['name'], + region_config['study_region_boundary'], + region_config['urban_region'], + region_config['urban_query'], + ) + region_config['network']['pedestrian'] = settings['network_analysis'][ + 'pedestrian' + ] + region_config['network']['description'] = network_description( + region_config, + ) + if 'data_type' in region_config['population'] and region_config[ + 'population' + ]['data_type'].startswith('vector'): + region_config[ + 'population_grid_setup' + ] = f'used the field "{region_config["population"]["vector_population_data_field"]}" to source estimates' + else: + region_config['population_grid_setup'] = ( + f'grid had a resolution of {region_config["population"]["resolution"]} m', + ) + return region_config + + +def format_date(date, format='%Y-%m-%d'): + """Format date as string.""" + from datetime import date as datetime_date + + if type(date) is datetime_date: + return date.strftime(format) + else: + return str(date) + + class Region: """A class for a study region (e.g. a city) that is used to load and store parameters contained in a yaml configuration file in the configuration/regions folder.""" def __init__(self, name): self.codename = name self.config = load_yaml(f'{config_path}/regions/{name}.yml') + self._check_required_configuration_parameters() self.name = self.config['name'] - self.config = self.region_dictionary_setup( - self.codename, self.config, folder_path, - ) + self.config = self._region_dictionary_setup(folder_path) + self._run_data_checks() self.engine = self.get_engine() self.tables = self.get_tables() + self.log = f"{self.config['region_dir']}/__{self.name}__{self.codename}_processing_log.txt" self.header = f"\n{self.name} ({self.codename})\n\nOutput directory:\n {self.config['region_dir'].replace('/home/ghsci/','')}\n" - def analysis(self): - """Run analysis for this study region.""" - from analysis import analysis as run_analysis - - run_analysis(self) - - def generate(self): - """Generate analysis outputs for this study region.""" - from generate import generate as generate_resources - - generate_resources(self) - - def compare(self, comparison): - """Compare analysis outputs for this study region with those of another.""" - from compare import compare as compare_resources - - comparison = compare_resources(self, comparison) - return comparison + def _check_required_configuration_parameters( + self, required=['name', 'year', 'country'], + ): + """Check required parameters are configured.""" + for key in required: + if key not in self.config or self.config[key] is None: + sys.exit( + f'\nThe required parameter "{key}" has not yet been configured in {self.codename}.yml. Please check the configured settings before proceeding.\n', + ) - def drop(self, table=''): - """Attempt to drop database results for this study region.""" - if table == '': - from _drop_study_region_database import ( - drop_study_region_database as drop_resources, - ) + def _region_dictionary_setup(self, folder_path): + """Set up region configuration dictionary.""" + codename = self.codename + r = self.config.copy() + r['authors'] = settings['documentation']['authors'] + study_buffer = settings['project']['study_buffer'] + units = settings['project']['units'] + buffered_urban_study_region = ( + f'urban_study_region_{study_buffer}{units}' + ) + r['crs_srid'] = f"{r['crs']['standard']}:{r['crs']['srid']}" + data_path = f'{folder_path}/process/data' + r[ + 'region_dir' + ] = f'{folder_path}/process/data/_study_region_outputs/{codename}' + if r['study_region_boundary']['data'] != 'urban_query': + r['study_region_boundary'][ + 'data' + ] = f"{data_path}/{r['study_region_boundary']['data']}" + r['urban_region'] = self._region_data_setup( + codename, r, 'urban_region', data_path, + ) + r['buffered_urban_study_region'] = buffered_urban_study_region + r['db'] = codename.lower() + r[ + 'dbComment' + ] = f'Liveability indicator data for {codename} {r["year"]}.' + r['db_host'] = settings['sql']['db_host'] + r['db_port'] = settings['sql']['db_port'] + r['db_user'] = settings['sql']['db_user'] + r['db_pwd'] = settings['sql']['db_pwd'] + r['population'] = self._region_data_setup( + codename, r, 'population', data_path, + ) + r['population_grid_field'] = 'pop_est' + if r['population']['data_type'].startswith('raster'): + resolution = f"{r['population']['resolution'].replace(' ', '')}_{r['population']['year_target']}".lower() + elif r['population']['data_type'].startswith('vector'): + resolution = f"{r['population']['alias']}_{r['population']['vector_population_data_field']}".lower() + r[ + 'population_grid_field' + ] = f"pop_est_{r['population']['vector_population_data_field'].lower()}" + r['population_grid'] = f'population_{resolution}'.lower() + if 'population_denominator' not in r['population']: + r['population']['population_denominator'] = r[ + 'population_grid_field' + ].lower() + else: + r['population']['population_denominator'] = r['population'][ + 'population_denominator' + ].lower() + r['population'][ + 'crs_srid' + ] = f'{r["population"]["crs_standard"]}:{r["population"]["crs_srid"]}' + r['OpenStreetMap'] = self._region_data_setup( + codename, r, 'OpenStreetMap', data_path, + ) + r['osm_prefix'] = f"osm_{r['OpenStreetMap']['publication_date']}" + r['OpenStreetMap'][ + 'osm_region' + ] = f'{r["region_dir"]}/{codename}_{r["osm_prefix"]}.pbf' + r['codename_poly'] = f'{r["region_dir"]}/poly_{r["db"]}.poly' + if 'osmnx_retain_all' not in r['network']: + r['network']['osmnx_retain_all'] = False + if 'osmnx_retain_all' not in r['network']: + r['network']['osmnx_retain_all'] = False + if 'buffered_region' not in r['network']: + r['network']['buffered_region'] = True + if 'polygon_iteration' not in r['network']: + r['network']['polygon_iteration'] = False + if 'connection_threshold' not in r['network']: + r['network']['connection_threshold'] = None + if ( + 'intersections' in r['network'] + and r['network']['intersections'] is not None + ): + intersections = os.path.splitext( + os.path.basename(r['network']['intersections']['data']), + )[0] + r['intersections_table'] = f'intersections_{intersections}' + else: + r[ + 'intersections_table' + ] = f"intersections_osmnx_{r['network']['intersection_tolerance']}m" + r['gpkg'] = f'{r["region_dir"]}/{codename}_{study_buffer}m_buffer.gpkg' + r['point_summary'] = 'indicators_sample_points' + r['grid_summary'] = f'indicators_{resolution}' + r['city_summary'] = 'indicators_region' + if 'custom_aggregations' not in r: + r['custom_aggregations'] = {} + # backwards compatibility with old templates + if 'country_gdp' in r and r['country_gdp'] is not None: + if 'reference' in r['country_gdp']: + r['country_gdp']['citation'] = r['country_gdp'].pop( + 'reference', None, + ) + if 'custom_destinations' in r and r['custom_destinations'] is not None: + if 'attribution' in r['custom_destinations']: + r['custom_destinations']['citation'] = r[ + 'custom_destinations' + ].pop('attribution', None) + if ( + 'policy_review' in r + and r['policy_review'] is not None + and r['policy_review'].endswith('.xlsx') + ): + r['policy_review'] = f"{folder_path}/{r['policy_review']}" + else: + # for now, we'll insert the blank template to allow the report to be generated + r[ + 'policy_review' + ] = f'{folder_path}/process/data/policy_review/_policy_review_template_v0_TO-BE-UPDATED.xlsx' + r = get_analysis_report_region_configuration(r, settings) + return r - drop_resources(self) + def _verify_data_dir(self, data_dir, verify_file_extension=None) -> dict: + """Return true if supplied data directory exists, optionally checking for existance of at least one file matching a specific extension within that directory.""" + if verify_file_extension is None: + return { + 'data': data_dir, + 'exists': os.path.exists(data_dir), + } + # If False: f'The configured file in datasets.yml could not be located at {data_dir}. Please check file and configuration of datasets.yml.', else: - with self.engine.begin() as connection: - try: - print('Dropping table {table}...}') - connection.execute( - text(f"""DROP TABLE IF EXISTS {table};"""), + if os.path.isfile(data_dir): + return { + 'data': data_dir, + 'exists': True, + } + else: + check = any( + File.endswith(verify_file_extension) + for File in os.listdir(data_dir) + ) + return { + 'data': data_dir, + 'exists': f'{check} ({verify_file_extension})', + } + + # Set up region data + def _region_data_setup( + self, region, region_config, data, data_path=None, + ): + """Check data configuration for regions and make paths absolute.""" + try: + if type(region_config[data]) == str: + if data not in datasets or datasets[data] is None: + sys.exit( + f'\nAn entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.\n', ) - except Exception as e: - print(f'Error: {e}') + elif region_config[data] is None: + sys.exit( + f'\nThe entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.\n', + ) + elif datasets[data][region_config[data]] is None: + sys.exit( + f'\nThe configured entry for {region_config[data]} under {data} within datasets.yml does not appear to be associated within any values. Please check and amend the specification for this entry within datasets.yml , or the configuration within {region}.yml to proceed. (is this entry and its records indented as per the provided example?)\n', + ) + data_dictionary = datasets[data][region_config[data]].copy() + else: + if data == 'urban_region' and ( + data not in region_config or region_config[data] is None + ): + urban_region_checks = [ + self.config['study_region_boundary'][ + 'ghsl_urban_intersection' + ], + 'covariate_data' in self.config + and self.config['covariate_data'] == 'urban_query', + ] + if any(urban_region_checks): + data_dictionary = {'data_dir': None, 'citation': ''} + else: + # print( + # f'Configuration for {data} not found in configuration file; skipping...', + # ) + data_dictionary = { + 'data_dir': 'Not required (neither urban region intersection or covariates referenced)', + 'citation': '', + } + else: + data_dictionary = region_config[data].copy() + if 'citation' not in data_dictionary: + if data != 'OpenStreetMap': + sys.exit( + f'\nNo citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).\n', + ) + elif 'source' not in data_dictionary: + data_dictionary[ + 'citation' + ] = f'OpenStreetMap Contributors ({str(data_dictionary["publication_date"])[:4]}). {data_dictionary["url"]}' + else: + data_dictionary[ + 'citation' + ] = f'OpenStreetMap Contributors. {data_dictionary["source"]} ({str(data_dictionary["publication_date"])[:4]}). {data_dictionary["url"]}' + if ('data_dir' not in data_dictionary) or ( + data_dictionary['data_dir'] is None + ): + sys.exit( + f"The 'data_dir' entry for {data} does not appear to have been defined. This parameter is required for analysis of {region}, and is used to locate a required dataset cross-referenced in {region}.yml. Please check the configured settings before proceeding.", + ) + if data_path is not None: + data_dictionary[ + 'data_dir' + ] = f"{data_path}/{data_dictionary['data_dir']}" + return data_dictionary + except Exception as e: + sys.exit(e) + + def _run_data_checks(self): + """Check configured data exists for this specified region.""" + checks = [] + failures = [] + data_check_report = '\nOne or more required resources were not located in the configured paths; please check your configuration for any items marked "False":\n' + self.config['study_region_boundary'][ + 'ghsl_urban_intersection' + ] = self.config['study_region_boundary'].pop( + 'ghsl_urban_intersection', False, + ) + urban_region_checks = [ + self.config['study_region_boundary']['ghsl_urban_intersection'], + 'covariate_data' in self.config + and self.config['covariate_data'] == 'urban_query', + ] + if ( + 'urban_region' in self.config + and self.config['urban_region'] is not None + ) and (urban_region_checks[0] or urban_region_checks[1]): + checks.append( + self._verify_data_dir( + self.config['urban_region']['data_dir'], + verify_file_extension=None, + ), + ) + elif urban_region_checks[0]: + checks.append( + { + 'data': "Urban region not configured, but required when 'ghsl_urban_intersection' is set to True", + 'exists': False, + }, + ) + elif urban_region_checks[1]: + checks.append( + { + 'data': "Urban region not configured, but required when 'covariate_data' set to 'urban_query'", + 'exists': False, + }, + ) + checks.append( + self._verify_data_dir( + self.config['OpenStreetMap']['data_dir'], + verify_file_extension=None, + ), + ) + checks.append( + self._verify_data_dir( + self.config['population']['data_dir'], + verify_file_extension='tif', + ), + ) + if self.config['study_region_boundary']['data'] != 'urban_query': + checks.append( + self._verify_data_dir( + self.config['study_region_boundary']['data'].split(':')[0], + ), + ) + for check in checks: + data_check_report += f"\n{check['exists']}: {check['data']}".replace( + folder_path, '...', + ) + if not check['exists']: + failures.append(check) + data_check_report += '\n' + if len(failures) > 0: + sys.exit(data_check_report) + + def analysis(self): + """Run analysis for this study region.""" + from analysis import analysis as run_analysis + + run_analysis(self) + + def generate(self): + """Generate analysis outputs for this study region.""" + from generate import generate as generate_resources + + generate_resources(self) + + def compare(self, comparison): + """Compare analysis outputs for this study region with those of another.""" + from compare import compare as compare_resources + + comparison = compare_resources(self, comparison) + return comparison + + def drop(self, table=''): + """Attempt to drop database results for this study region.""" + if table == '': + from _drop_study_region_database import ( + drop_study_region_database as drop_resources, + ) + + drop_resources(self) + else: + with self.engine.begin() as connection: + try: + print('Dropping table {table}...}') + connection.execute( + text(f"""DROP TABLE IF EXISTS {table};"""), + ) + except Exception as e: + print(f'Error: {e}') def _create_database(self): """Create database for this study region.""" @@ -586,293 +978,6 @@ def to_csv(self, table, file, drop=['geom'], index=False): df.to_csv(file, index=index) return file - def run_data_checks(self): - """Check configured data exists for this specified region.""" - checks = [] - failures = [] - data_check_report = '\nOne or more required resources were not located in the configured paths; please check your configuration for any items marked "False":\n' - self.config['study_region_boundary'][ - 'ghsl_urban_intersection' - ] = self.config['study_region_boundary'].pop( - 'ghsl_urban_intersection', False, - ) - urban_region_checks = [ - self.config['study_region_boundary']['ghsl_urban_intersection'], - 'covariate_data' in self.config - and self.config['covariate_data'] == 'urban_query', - ] - if ( - 'urban_region' in self.config - and self.config['urban_region'] is not None - ) and (urban_region_checks[0] or urban_region_checks[1]): - checks.append( - self.verify_data_dir( - self.config['urban_region']['data_dir'], - verify_file_extension=None, - ), - ) - # urban_query_check = 'urban_query' in self.config and self.config['urban_query'] is not None - # checks.append({ - # 'data': ["urban_query has not been configured, but urban region is referenced elsewhere in configuration","urban_query is configured"][urban_query_check], - # 'exists': urban_query_check, - # }) - elif urban_region_checks[0]: - checks.append( - { - 'data': "Urban region not configured, but required when 'ghsl_urban_intersection' is set to True", - 'exists': False, - }, - ) - elif urban_region_checks[1]: - checks.append( - { - 'data': "Urban region not configured, but required when 'covariate_data' set to 'urban_query'", - 'exists': False, - }, - ) - checks.append( - self.verify_data_dir( - self.config['OpenStreetMap']['data_dir'], - verify_file_extension=None, - ), - ) - checks.append( - self.verify_data_dir( - self.config['population']['data_dir'], - verify_file_extension='tif', - ), - ) - if self.config['study_region_boundary']['data'] != 'urban_query': - checks.append( - self.verify_data_dir( - self.config['study_region_boundary']['data'].split(':')[0], - ), - ) - for check in checks: - data_check_report += f"\n{check['exists']}: {check['data']}".replace( - folder_path, '...', - ) - if not check['exists']: - failures.append(check) - data_check_report += '\n' - if len(failures) > 0: - sys.exit(data_check_report) - - def verify_data_dir(self, data_dir, verify_file_extension=None) -> dict: - """Return true if supplied data directory exists, optionally checking for existance of at least one file matching a specific extension within that directory.""" - if verify_file_extension is None: - return { - 'data': data_dir, - 'exists': os.path.exists(data_dir), - } - # If False: f'The configured file in datasets.yml could not be located at {data_dir}. Please check file and configuration of datasets.yml.', - else: - if os.path.isfile(data_dir): - return { - 'data': data_dir, - 'exists': True, - } - else: - check = any( - File.endswith(verify_file_extension) - for File in os.listdir(data_dir) - ) - return { - 'data': data_dir, - 'exists': f'{check} ({verify_file_extension})', - } - - # Set up region data - def region_data_setup( - self, region, region_config, data, data_path=None, - ): - """Check data configuration for regions and make paths absolute.""" - try: - if type(region_config[data]) == str: - if data not in datasets or datasets[data] is None: - sys.exit( - f'\nAn entry for at least one {data} dataset does not appear to have been defined in datasets.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml with region configuration in {region}.yml. Please update datasets.yml to proceed.\n', - ) - elif region_config[data] is None: - sys.exit( - f'\nThe entry for {data} does not appear to have been defined in {region}.yml. This parameter is required for analysis, and is used to cross-reference a relevant dataset defined in datasets.yml. Please update {region}.yml to proceed.\n', - ) - elif datasets[data][region_config[data]] is None: - sys.exit( - f'\nThe configured entry for {region_config[data]} under {data} within datasets.yml does not appear to be associated within any values. Please check and amend the specification for this entry within datasets.yml , or the configuration within {region}.yml to proceed. (is this entry and its records indented as per the provided example?)\n', - ) - data_dictionary = datasets[data][region_config[data]].copy() - else: - if data == 'urban_region' and ( - data not in region_config or region_config[data] is None - ): - urban_region_checks = [ - self.config['study_region_boundary'][ - 'ghsl_urban_intersection' - ], - 'covariate_data' in self.config - and self.config['covariate_data'] == 'urban_query', - ] - if any(urban_region_checks): - data_dictionary = {'data_dir': None, 'citation': ''} - else: - # print( - # f'Configuration for {data} not found in configuration file; skipping...', - # ) - data_dictionary = { - 'data_dir': 'Not required (neither urban region intersection or covariates referenced)', - 'citation': '', - } - else: - data_dictionary = region_config[data].copy() - if 'citation' not in data_dictionary: - if data != 'OpenStreetMap': - sys.exit( - f'\nNo citation record has been configured for the {data} dataset configured for this region. Please add this to its record in datasets.yml (see template datasets.yml for examples).\n', - ) - elif 'source' not in data_dictionary: - data_dictionary[ - 'citation' - ] = f'OpenStreetMap Contributors ({str(data_dictionary["publication_date"])[:4]}). {data_dictionary["url"]}' - else: - data_dictionary[ - 'citation' - ] = f'OpenStreetMap Contributors. {data_dictionary["source"]} ({str(data_dictionary["publication_date"])[:4]}). {data_dictionary["url"]}' - if ('data_dir' not in data_dictionary) or ( - data_dictionary['data_dir'] is None - ): - sys.exit( - f"The 'data_dir' entry for {data} does not appear to have been defined. This parameter is required for analysis of {region}, and is used to locate a required dataset cross-referenced in {region}.yml. Please check the configured settings before proceeding.", - ) - if data_path is not None: - data_dictionary[ - 'data_dir' - ] = f"{data_path}/{data_dictionary['data_dir']}" - return data_dictionary - except Exception as e: - sys.exit(e) - - def region_dictionary_setup(self, codename, region_config, folder_path): - """Set up region configuration dictionary.""" - r = region_config.copy() - for key in ['name', 'year', 'country']: - if key not in r or r[key] is None: - sys.exit( - f'\nThe required parameter "{key}" has not yet been configured in {codename}.yml. Please check the configured settings before proceeding.\n', - ) - study_buffer = settings['project']['study_buffer'] - units = settings['project']['units'] - buffered_urban_study_region = ( - f'urban_study_region_{study_buffer}{units}' - ) - r['crs_srid'] = f"{r['crs']['standard']}:{r['crs']['srid']}" - data_path = f'{folder_path}/process/data' - r[ - 'region_dir' - ] = f'{folder_path}/process/data/_study_region_outputs/{codename}' - if r['study_region_boundary']['data'] != 'urban_query': - r['study_region_boundary'][ - 'data' - ] = f"{data_path}/{r['study_region_boundary']['data']}" - r['urban_region'] = self.region_data_setup( - codename, region_config, 'urban_region', data_path, - ) - r['buffered_urban_study_region'] = buffered_urban_study_region - r['db'] = codename.lower() - r[ - 'dbComment' - ] = f'Liveability indicator data for {codename} {r["year"]}.' - r['db_host'] = settings['sql']['db_host'] - r['db_port'] = settings['sql']['db_port'] - r['db_user'] = settings['sql']['db_user'] - r['db_pwd'] = settings['sql']['db_pwd'] - r['population'] = self.region_data_setup( - codename, region_config, 'population', data_path, - ) - r['population_grid_field'] = 'pop_est' - if r['population']['data_type'].startswith('raster'): - resolution = f"{r['population']['resolution'].replace(' ', '')}_{r['population']['year_target']}".lower() - elif r['population']['data_type'].startswith('vector'): - resolution = f"{r['population']['alias']}_{r['population']['vector_population_data_field']}".lower() - r['population_grid_field'] = ( - 'pop_est_' - + r['population']['vector_population_data_field'].lower() - ) - r['population_grid'] = f'population_{resolution}'.lower() - if 'population_denominator' not in r['population']: - r['population']['population_denominator'] = r[ - 'population_grid_field' - ].lower() - else: - r['population']['population_denominator'] = r['population'][ - 'population_denominator' - ].lower() - r['population'][ - 'crs_srid' - ] = f'{r["population"]["crs_standard"]}:{r["population"]["crs_srid"]}' - r['OpenStreetMap'] = self.region_data_setup( - codename, region_config, 'OpenStreetMap', data_path, - ) - r['osm_prefix'] = f"osm_{r['OpenStreetMap']['publication_date']}" - r['OpenStreetMap'] = self.region_data_setup( - codename, region_config, 'OpenStreetMap', data_path, - ) - r['OpenStreetMap'][ - 'osm_region' - ] = f'{r["region_dir"]}/{codename}_{r["osm_prefix"]}.pbf' - r['codename_poly'] = f'{r["region_dir"]}/poly_{r["db"]}.poly' - if 'osmnx_retain_all' not in r['network']: - r['network']['osmnx_retain_all'] = False - if 'osmnx_retain_all' not in r['network']: - r['network']['osmnx_retain_all'] = False - if 'buffered_region' not in r['network']: - r['network']['buffered_region'] = True - if 'polygon_iteration' not in r['network']: - r['network']['polygon_iteration'] = False - if 'connection_threshold' not in r['network']: - r['network']['connection_threshold'] = None - if ( - 'intersections' in r['network'] - and r['network']['intersections'] is not None - ): - intersections = os.path.splitext( - os.path.basename(r['network']['intersections']['data']), - )[0] - r['intersections_table'] = f'intersections_{intersections}' - else: - r[ - 'intersections_table' - ] = f"intersections_osmnx_{r['network']['intersection_tolerance']}m" - r['gpkg'] = f'{r["region_dir"]}/{codename}_{study_buffer}m_buffer.gpkg' - r['point_summary'] = 'indicators_sample_points' - r['grid_summary'] = f'indicators_{resolution}' - r['city_summary'] = 'indicators_region' - if 'custom_aggregations' not in r: - r['custom_aggregations'] = {} - # backwards compatibility with old templates - if 'country_gdp' in r and r['country_gdp'] is not None: - if 'reference' in r['country_gdp']: - r['country_gdp']['citation'] = r['country_gdp'].pop( - 'reference', None, - ) - if 'custom_destinations' in r and r['custom_destinations'] is not None: - if 'attribution' in r['custom_destinations']: - r['custom_destinations']['citation'] = r[ - 'custom_destinations' - ].pop('attribution', None) - if ( - 'policy_review' in r - and r['policy_review'] is not None - and r['policy_review'].endswith('.xlsx') - ): - r['policy_review'] = f"{folder_path}/{r['policy_review']}" - else: - # for now, we'll insert the blank template to allow the report to be generated - r[ - 'policy_review' - ] = f'{folder_path}/process/data/policy_review/_policy_review_template_v0_TO-BE-UPDATED.xlsx' - return r - # Allow for project setup to run from different directories; potentially outside docker # This means project configuration and set up can be verified in externally launched tests