diff --git a/constants.py b/constants.py index c56be4e..912e33c 100644 --- a/constants.py +++ b/constants.py @@ -152,6 +152,7 @@ paths_dict["other_footways_subcategories"][subcategory] = subcategory_path paths_dict["map_layers"][subcategory] = subcategory_path +versioning_dict = paths_dict["versioning"] # max radius to cut off unconnected crossings and kerbs max_radius_cutoff = 50 @@ -583,3 +584,10 @@ # defined here to avoid circular importing problems def get_url(relative_url, base_url=node_homepage_url): return os.path.join(base_url, relative_url) + + +# to fill in default values for dates: +default_missing_day = 9 +default_missing_month = 8 +default_missing_year = 2004 +# OSM's foundation date :-) diff --git a/dashboard/statistics_funcs.py b/dashboard/statistics_funcs.py index 4fdd729..48ed33a 100644 --- a/dashboard/statistics_funcs.py +++ b/dashboard/statistics_funcs.py @@ -23,9 +23,9 @@ def get_count_df(input_df, fieldname, str_to_append=" type"): input_df[fieldname] .value_counts() .reset_index() - .rename(columns={"index": outfieldname, fieldname: "count"}) .sort_values(by="count", ascending=False), outfieldname, + # .rename(columns={"index": outfieldname, fieldname: "count"}) ) @@ -33,7 +33,7 @@ def create_barchart( input_df, fieldname, title, - str_to_append=" type", + str_to_append="", title_fontsize=24, tooltip="count", x_sort="-y", @@ -161,7 +161,7 @@ def double_scatter_bar( width=600, height=350, ) - .add_selection(interval) + .add_params(interval) ) hist_base = ( @@ -189,4 +189,13 @@ def double_scatter_bar( return (scatter & hist).configure_title(fontSize=fontsize, align="center") -# 'Surface x Smoothness' +def create_rev_date(row): + try: + return datetime( + year=int(row["rev_year"]), + month=int(row["rev_month"]), + day=int(row["rev_day"]), + ) + except ValueError: + # Handle invalid dates, you can return None or a specific default date + return None diff --git a/dashboard/statistics_generation.py b/dashboard/statistics_generation.py index 533f01d..dd0cbc7 100644 --- a/dashboard/statistics_generation.py +++ b/dashboard/statistics_generation.py @@ -2,43 +2,67 @@ # reading data: -exit(0) -for category in paths_dict["data"]: - if category != "other_footways": # TODO: remove this to include this category also - print("Adaptations for:", category) +for category in gdfs_dict: + # creating the folder if it does not exist + create_folder_if_not_exists(os.path.join("statistics", category)) - # creating additional fields + # creating a ref to improve readability + cat_gdf = gdfs_dict[category] + update_df = updating_dicts[category] - if geom_type_dict[category] == "LineString": - create_length_field(gdfs_dict[category]) - create_weblink_field(gdfs_dict[category]) - elif geom_type_dict[category] == "Point": - create_weblink_field(gdfs_dict[category], "Point") + print("Adaptations for:", category) - if "survey:date" in gdfs_dict[category].columns: + # creating additional fields - gdfs_dict[category]["Year of Survey"] = gdfs_dict[category][ - "survey:date" - ].apply(get_year_surveydate) + if "LineString" in geom_type_dict[category]: + create_length_field(cat_gdf) + create_weblink_field(cat_gdf) + elif "Point" in geom_type_dict[category]: + create_weblink_field(cat_gdf, "Point") - create_folder_if_not_exists(os.path.join("statistics", category)) + # uncertain about polygon cases + # elif (: + # geom_type_dict[category] == "Polygon" + # or geom_type_dict[category] == "MultiPolygon" + # ): + # create_weblink_field(gdfs_dict[category]) - # updating info: - updating_dicts[category]["month_year"] = ( - updating_dicts[category]["rev_month"].map("{:02d}".format) - + "_" - + updating_dicts[category]["rev_year"].astype(str) - ) + if "survey:date" in cat_gdf.columns: - updating_dicts[category]["year_month"] = ( - updating_dicts[category]["rev_year"].astype(str) - + "_" - + updating_dicts[category]["rev_month"].map("{:02d}".format) - ) + cat_gdf["Year of Survey"] = cat_gdf["survey:date"].apply(get_year_surveydate) - updating_dicts[category].sort_values("year_month", inplace=True) + # updating info: + update_df["month_year"] = ( + update_df["rev_month"].map("{:02d}".format) + + "_" + + update_df["rev_year"].astype(str) + ) + update_df["year_month"] = ( + update_df["rev_year"].astype(str) + + "_" + + update_df["rev_month"].map("{:02d}".format) + ) + + update_df.sort_values("year_month", inplace=True) + + # Fill missing values with a default (e.g., 1 for month or day) TODO: move to data adaptation script + update_df["rev_year"] = ( + update_df["rev_year"].fillna(default_missing_year).astype(int) + ) + update_df["rev_month"] = ( + update_df["rev_month"].fillna(default_missing_month).astype(int) + ) + update_df["rev_day"] = update_df["rev_day"].fillna(default_missing_day).astype(int) + + update_df["rev_date_obj"] = update_df.apply(create_rev_date, axis=1) + + update_df["age_years"] = ( + pd.Timestamp(datetime.today()) - update_df["rev_date_obj"] + ).dt.days / 365.25 + +# storing chart infos: generated_list_dict = {} charts_titles = {} @@ -79,6 +103,10 @@ print(generated_list_dict) for category in generated_list_dict: + if not generated_list_dict[category]: + print("no charts generated for: ", category) + continue + category_homepage = get_url(generated_list_dict[category][0]) topbar += f'{category.capitalize()} Charts\n' diff --git a/dashboard/statistics_specs.py b/dashboard/statistics_specs.py index 323352f..a8a167f 100644 --- a/dashboard/statistics_specs.py +++ b/dashboard/statistics_specs.py @@ -1,14 +1,13 @@ from statistics_funcs import * -gdfs_dict = {} +gdfs_dict = get_gdfs_dict() updating_dicts = {} for category in paths_dict["data"]: - gdfs_dict[category] = gpd.read_parquet(paths_dict["data"][category]) if os.path.exists(paths_dict["versioning"].get(category)): - updating_dicts[category] = pd.read_json(paths_dict["versioning"][category]) + updating_dicts[category] = pd.read_json(versioning_dict[category]) else: updating_dicts[category] = pd.DataFrame() @@ -87,15 +86,15 @@ ), "title": "Incline Values", }, - "sidewalks_survey_year": { - "function": create_barchart, - "params": ( - gdfs_dict["sidewalks"], - "Year of Survey", - "Year of Survey Image (sidewalks)", - ), - "title": "Year of Survey Image", - }, + # "sidewalks_survey_year": { + # "function": create_barchart, + # "params": ( + # gdfs_dict["sidewalks"], + # "Year of Survey", + # "Year of Survey Image (sidewalks)", + # ), + # "title": "Year of Survey Image", + # }, "sidewalks_yr_moth_update": { "function": create_barchart, "params": ( @@ -159,15 +158,15 @@ ), "title": "Surface x Smoothness", }, - "crossings_survey_year": { - "function": create_barchart, - "params": ( - gdfs_dict["crossings"], - "Year of Survey", - "Year of Survey Image (crossings)", - ), - "title": "Year of Survey Image", - }, + # "crossings_survey_year": { + # "function": create_barchart, + # "params": ( + # gdfs_dict["crossings"], + # "Year of Survey", + # "Year of Survey Image (crossings)", + # ), + # "title": "Year of Survey Image", + # }, "crossings_yr_moth_update": { "function": create_barchart, "params": ( @@ -240,15 +239,15 @@ ), "title": "Wheelchair Acessibility", }, - "kerbs_survey_year": { - "function": create_barchart, - "params": ( - gdfs_dict["kerbs"], - "Year of Survey", - "Year of Survey Image (kerbs)", - ), - "title": "Year of Survey Image", - }, + # "kerbs_survey_year": { + # "function": create_barchart, + # "params": ( + # gdfs_dict["kerbs"], + # "Year of Survey", + # "Year of Survey Image (kerbs)", + # ), + # "title": "Year of Survey Image", + # }, "kerbs_yr_moth_update": { "function": create_barchart, "params": ( @@ -276,6 +275,60 @@ "title": "Number Of Revisions", }, }, + "other_footways": { + "other_footways_surface": { + "function": create_barchart, + "params": ( + updating_dicts["other_footways"], + "surface", + "other_footways Surface", + ), + "title": "Other Footways Surface", + }, + "other_footways_smoothness_x_surface": { + "function": double_scatter_bar, + "params": ( + gdfs_dict["other_footways"], + "Surface x Smoothness (other_footways)", + "surface", + "smoothness", + None, + "count()", + "surface", + "smoothness", + "crossing", + 24, + ["element_type", "id"], + ), + "title": "Surface x Smoothness", + }, + "other_footways_yr_moth_update": { + "function": create_barchart, + "params": ( + updating_dicts["other_footways"], + "year_month", + "Year and Month Of Update (other_footways)", + " type", + 24, + "count", + "-x", + ), + "title": "Year and Month Of Update", + }, + "other_footways_number_revisions": { + "function": create_barchart, + "params": ( + updating_dicts["other_footways"], + "n_revs", + "Year and Month Of Update (other_footways)", + " type", + 24, + "count", + "-x", + ), + "title": "Number Of Revisions", + }, + }, } global_insertions = { diff --git a/functions.py b/functions.py index cc83882..213d0bd 100644 --- a/functions.py +++ b/functions.py @@ -674,7 +674,7 @@ def create_length_field(input_gdf, fieldname="length(km)", in_km=True): factor = 1000 utm_crs = input_gdf.estimate_utm_crs() - input_gdf["length(km)"] = input_gdf.to_crs(utm_crs).length / factor + input_gdf[fieldname] = input_gdf.to_crs(utm_crs).length / factor def create_weblink_field( @@ -845,14 +845,14 @@ def get_gdfs_dict(raw_data=False): } -def get_gdfs_dict_v2(category="data"): +def get_gdfs_dict_v2(): """ - available categories: 'data', 'data_raw','other_footways_subcategories', 'map_layers' + shall include also the specialized categories """ return { - category: gpd.read_parquet(paths_dict["data"][category]) - for category in paths_dict["data"] + category: gpd.read_parquet(paths_dict["map_layers"][category]) + for category in paths_dict["map_layers"] } diff --git a/getting_feature_versioning_data.py b/getting_feature_versioning_data.py index ab1afa9..0211848 100644 --- a/getting_feature_versioning_data.py +++ b/getting_feature_versioning_data.py @@ -66,7 +66,9 @@ as_df = pd.DataFrame(data) - as_df.to_json(f"data/{category}_versioning.json") + # as_df.to_json(f"data/{category}_versioning.json") + + as_df.to_json(paths_dict["versioning"][category]) # to record data aging: record_datetime("Versioning Data") diff --git a/todo b/todo index 2298339..0788508 100644 --- a/todo +++ b/todo @@ -1,6 +1,6 @@ [ ] Create the new modules [ ] The Data Watcher - [ ] The Data Acquisition Hub + [ ] The Data Acquisition Hub (DAH) [ ] Improvements on data Quality [ ] Add the geometric filters [ ] External Providers, such as OSMCHA @@ -16,6 +16,10 @@ and routing streamlit demo [ ] On each module, for all layers in a dataset [ ] Global Layers of each category and also the ones for each sub category [ ] change data updating to become the "data" module +[ ] Create a system to stop updates, both in case of manual triggering of it, or for absence of updates (linked to DAH) + +LESS IMPORTANT: + [ ] Styling stuff: [ ] put a faded color on indoor stuff [ ] Elevation stuff: