Skip to content

Commit

Permalink
first revamping on charts, lots of TODOs
Browse files Browse the repository at this point in the history
  • Loading branch information
kauevestena committed Sep 27, 2024
1 parent 329a199 commit 41ab623
Show file tree
Hide file tree
Showing 7 changed files with 172 additions and 68 deletions.
8 changes: 8 additions & 0 deletions constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
paths_dict["other_footways_subcategories"][subcategory] = subcategory_path
paths_dict["map_layers"][subcategory] = subcategory_path

versioning_dict = paths_dict["versioning"]

# max radius to cut off unconnected crossings and kerbs
max_radius_cutoff = 50
Expand Down Expand Up @@ -583,3 +584,10 @@
# defined here to avoid circular importing problems
def get_url(relative_url, base_url=node_homepage_url):
return os.path.join(base_url, relative_url)


# to fill in default values for dates:
default_missing_day = 9
default_missing_month = 8
default_missing_year = 2004
# OSM's foundation date :-)
17 changes: 13 additions & 4 deletions dashboard/statistics_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,17 @@ def get_count_df(input_df, fieldname, str_to_append=" type"):
input_df[fieldname]
.value_counts()
.reset_index()
.rename(columns={"index": outfieldname, fieldname: "count"})
.sort_values(by="count", ascending=False),
outfieldname,
# .rename(columns={"index": outfieldname, fieldname: "count"})
)


def create_barchart(
input_df,
fieldname,
title,
str_to_append=" type",
str_to_append="",
title_fontsize=24,
tooltip="count",
x_sort="-y",
Expand Down Expand Up @@ -161,7 +161,7 @@ def double_scatter_bar(
width=600,
height=350,
)
.add_selection(interval)
.add_params(interval)
)

hist_base = (
Expand Down Expand Up @@ -189,4 +189,13 @@ def double_scatter_bar(
return (scatter & hist).configure_title(fontSize=fontsize, align="center")


# 'Surface x Smoothness'
def create_rev_date(row):
try:
return datetime(
year=int(row["rev_year"]),
month=int(row["rev_month"]),
day=int(row["rev_day"]),
)
except ValueError:
# Handle invalid dates, you can return None or a specific default date
return None
82 changes: 55 additions & 27 deletions dashboard/statistics_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,67 @@

# reading data:

exit(0)

for category in paths_dict["data"]:
if category != "other_footways": # TODO: remove this to include this category also
print("Adaptations for:", category)
for category in gdfs_dict:
# creating the folder if it does not exist
create_folder_if_not_exists(os.path.join("statistics", category))

# creating additional fields
# creating a ref to improve readability
cat_gdf = gdfs_dict[category]
update_df = updating_dicts[category]

if geom_type_dict[category] == "LineString":
create_length_field(gdfs_dict[category])
create_weblink_field(gdfs_dict[category])
elif geom_type_dict[category] == "Point":
create_weblink_field(gdfs_dict[category], "Point")
print("Adaptations for:", category)

if "survey:date" in gdfs_dict[category].columns:
# creating additional fields

gdfs_dict[category]["Year of Survey"] = gdfs_dict[category][
"survey:date"
].apply(get_year_surveydate)
if "LineString" in geom_type_dict[category]:
create_length_field(cat_gdf)
create_weblink_field(cat_gdf)
elif "Point" in geom_type_dict[category]:
create_weblink_field(cat_gdf, "Point")

create_folder_if_not_exists(os.path.join("statistics", category))
# uncertain about polygon cases
# elif (:
# geom_type_dict[category] == "Polygon"
# or geom_type_dict[category] == "MultiPolygon"
# ):
# create_weblink_field(gdfs_dict[category])

# updating info:
updating_dicts[category]["month_year"] = (
updating_dicts[category]["rev_month"].map("{:02d}".format)
+ "_"
+ updating_dicts[category]["rev_year"].astype(str)
)
if "survey:date" in cat_gdf.columns:

updating_dicts[category]["year_month"] = (
updating_dicts[category]["rev_year"].astype(str)
+ "_"
+ updating_dicts[category]["rev_month"].map("{:02d}".format)
)
cat_gdf["Year of Survey"] = cat_gdf["survey:date"].apply(get_year_surveydate)

updating_dicts[category].sort_values("year_month", inplace=True)
# updating info:
update_df["month_year"] = (
update_df["rev_month"].map("{:02d}".format)
+ "_"
+ update_df["rev_year"].astype(str)
)

update_df["year_month"] = (
update_df["rev_year"].astype(str)
+ "_"
+ update_df["rev_month"].map("{:02d}".format)
)

update_df.sort_values("year_month", inplace=True)

# Fill missing values with a default (e.g., 1 for month or day) TODO: move to data adaptation script
update_df["rev_year"] = (
update_df["rev_year"].fillna(default_missing_year).astype(int)
)
update_df["rev_month"] = (
update_df["rev_month"].fillna(default_missing_month).astype(int)
)
update_df["rev_day"] = update_df["rev_day"].fillna(default_missing_day).astype(int)

update_df["rev_date_obj"] = update_df.apply(create_rev_date, axis=1)

update_df["age_years"] = (
pd.Timestamp(datetime.today()) - update_df["rev_date_obj"]
).dt.days / 365.25

# storing chart infos:
generated_list_dict = {}
charts_titles = {}

Expand Down Expand Up @@ -79,6 +103,10 @@
print(generated_list_dict)

for category in generated_list_dict:
if not generated_list_dict[category]:
print("no charts generated for: ", category)
continue

category_homepage = get_url(generated_list_dict[category][0])

topbar += f'<a href="{category_homepage}">{category.capitalize()} Charts</a>\n'
Expand Down
113 changes: 83 additions & 30 deletions dashboard/statistics_specs.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from statistics_funcs import *

gdfs_dict = {}
gdfs_dict = get_gdfs_dict()
updating_dicts = {}


for category in paths_dict["data"]:
gdfs_dict[category] = gpd.read_parquet(paths_dict["data"][category])

if os.path.exists(paths_dict["versioning"].get(category)):
updating_dicts[category] = pd.read_json(paths_dict["versioning"][category])
updating_dicts[category] = pd.read_json(versioning_dict[category])
else:
updating_dicts[category] = pd.DataFrame()

Expand Down Expand Up @@ -87,15 +86,15 @@
),
"title": "Incline Values",
},
"sidewalks_survey_year": {
"function": create_barchart,
"params": (
gdfs_dict["sidewalks"],
"Year of Survey",
"Year of Survey Image (sidewalks)",
),
"title": "Year of Survey Image",
},
# "sidewalks_survey_year": {
# "function": create_barchart,
# "params": (
# gdfs_dict["sidewalks"],
# "Year of Survey",
# "Year of Survey Image (sidewalks)",
# ),
# "title": "Year of Survey Image",
# },
"sidewalks_yr_moth_update": {
"function": create_barchart,
"params": (
Expand Down Expand Up @@ -159,15 +158,15 @@
),
"title": "Surface x Smoothness",
},
"crossings_survey_year": {
"function": create_barchart,
"params": (
gdfs_dict["crossings"],
"Year of Survey",
"Year of Survey Image (crossings)",
),
"title": "Year of Survey Image",
},
# "crossings_survey_year": {
# "function": create_barchart,
# "params": (
# gdfs_dict["crossings"],
# "Year of Survey",
# "Year of Survey Image (crossings)",
# ),
# "title": "Year of Survey Image",
# },
"crossings_yr_moth_update": {
"function": create_barchart,
"params": (
Expand Down Expand Up @@ -240,15 +239,15 @@
),
"title": "Wheelchair Acessibility",
},
"kerbs_survey_year": {
"function": create_barchart,
"params": (
gdfs_dict["kerbs"],
"Year of Survey",
"Year of Survey Image (kerbs)",
),
"title": "Year of Survey Image",
},
# "kerbs_survey_year": {
# "function": create_barchart,
# "params": (
# gdfs_dict["kerbs"],
# "Year of Survey",
# "Year of Survey Image (kerbs)",
# ),
# "title": "Year of Survey Image",
# },
"kerbs_yr_moth_update": {
"function": create_barchart,
"params": (
Expand Down Expand Up @@ -276,6 +275,60 @@
"title": "Number Of Revisions",
},
},
"other_footways": {
"other_footways_surface": {
"function": create_barchart,
"params": (
updating_dicts["other_footways"],
"surface",
"other_footways Surface",
),
"title": "Other Footways Surface",
},
"other_footways_smoothness_x_surface": {
"function": double_scatter_bar,
"params": (
gdfs_dict["other_footways"],
"Surface x Smoothness (other_footways)",
"surface",
"smoothness",
None,
"count()",
"surface",
"smoothness",
"crossing",
24,
["element_type", "id"],
),
"title": "Surface x Smoothness",
},
"other_footways_yr_moth_update": {
"function": create_barchart,
"params": (
updating_dicts["other_footways"],
"year_month",
"Year and Month Of Update (other_footways)",
" type",
24,
"count",
"-x",
),
"title": "Year and Month Of Update",
},
"other_footways_number_revisions": {
"function": create_barchart,
"params": (
updating_dicts["other_footways"],
"n_revs",
"Year and Month Of Update (other_footways)",
" type",
24,
"count",
"-x",
),
"title": "Number Of Revisions",
},
},
}

global_insertions = {
Expand Down
10 changes: 5 additions & 5 deletions functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ def create_length_field(input_gdf, fieldname="length(km)", in_km=True):
factor = 1000

utm_crs = input_gdf.estimate_utm_crs()
input_gdf["length(km)"] = input_gdf.to_crs(utm_crs).length / factor
input_gdf[fieldname] = input_gdf.to_crs(utm_crs).length / factor


def create_weblink_field(
Expand Down Expand Up @@ -845,14 +845,14 @@ def get_gdfs_dict(raw_data=False):
}


def get_gdfs_dict_v2(category="data"):
def get_gdfs_dict_v2():
"""
available categories: 'data', 'data_raw','other_footways_subcategories', 'map_layers'
shall include also the specialized categories
"""

return {
category: gpd.read_parquet(paths_dict["data"][category])
for category in paths_dict["data"]
category: gpd.read_parquet(paths_dict["map_layers"][category])
for category in paths_dict["map_layers"]
}


Expand Down
4 changes: 3 additions & 1 deletion getting_feature_versioning_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,9 @@

as_df = pd.DataFrame(data)

as_df.to_json(f"data/{category}_versioning.json")
# as_df.to_json(f"data/{category}_versioning.json")

as_df.to_json(paths_dict["versioning"][category])

# to record data aging:
record_datetime("Versioning Data")
Expand Down
6 changes: 5 additions & 1 deletion todo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[ ] Create the new modules
[ ] The Data Watcher
[ ] The Data Acquisition Hub
[ ] The Data Acquisition Hub (DAH)
[ ] Improvements on data Quality
[ ] Add the geometric filters
[ ] External Providers, such as OSMCHA
Expand All @@ -16,6 +16,10 @@ and routing streamlit demo
[ ] On each module, for all layers in a dataset
[ ] Global Layers of each category and also the ones for each sub category
[ ] change data updating to become the "data" module
[ ] Create a system to stop updates, both in case of manual triggering of it, or for absence of updates (linked to DAH)

LESS IMPORTANT:

[ ] Styling stuff:
[ ] put a faded color on indoor stuff
[ ] Elevation stuff:
Expand Down

0 comments on commit 41ab623

Please sign in to comment.