Skip to content
This repository has been archived by the owner on Jan 9, 2025. It is now read-only.

Commit

Permalink
Add threshold filtering
Browse files Browse the repository at this point in the history
  • Loading branch information
santind committed Nov 21, 2024
1 parent 5ccf60b commit 5dde5f8
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 6 deletions.
32 changes: 32 additions & 0 deletions mapswipe/workflows/project_remap.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,18 @@
HEX_VIZ_H3_RESOLUTION = 11


# Acceptable threshold types and max value validation
THRESHOLD_TYPES = {
"Top N Tasks": lambda df: len(df),
"Top N% of Tasks": lambda _: 100,
"Tasks With >=N remap_score": lambda _: 1.0,
}


# Remap selection column name
SELECTION_COL = "_selected"


# Internal logic

_OFFSET_RESPONSE = 3
Expand Down Expand Up @@ -177,3 +189,23 @@ def analyze_project(project_id):
pbar.update(1)
pbar.set_description("Analysis complete")
return vars


def _validate_threshold_args(gdf, threshold_n, threshold_type):
if threshold_type not in THRESHOLD_TYPES:
raise ValueError(f"Unsupported threshold type '{threshold_type}'")
max_val = THRESHOLD_TYPES[threshold_type](gdf)
if not (0 <= threshold_n <= max_val):
raise ValueError(f"Threshold type '{threshold_type}' must be between 0 and {max_val}")


def apply_threshold_filter(gdf, threshold_n, threshold_type, selection_col):
_validate_threshold_args(gdf, threshold_n, threshold_type)
gdf = gdf.sort_values("remap_score").copy()
if threshold_type == "Top N% of Tasks":
threshold_n = int(len(gdf) * (threshold_n / 100.0))
elif threshold_type == "Tasks With >=N remap_score":
threshold_n = int(len(gdf[gdf["remap_score"] >= threshold_n]))
gdf[selection_col] = 0.0
gdf[selection_col].iloc[-threshold_n:] = 1.0
return gdf
15 changes: 11 additions & 4 deletions mapswipe/workflows/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,13 +189,15 @@ def style_function(feature):
return m


def create_task_map(gdf_agg, color_col, value_cols, col_descs, center_pt=None):
def create_task_map(gdf_agg, color_col, value_cols, col_descs, selection_col=None, center_pt=None, color_bounds=None):
gdf = gdf_agg.copy()

geojson_data = gdf.drop('lastEdit', axis=1).to_json()

if center_pt is None:
center_pt = gdf.to_crs(gdf.estimate_utm_crs()).dissolve().centroid.to_crs(4326)
if color_bounds is None:
color_bounds = (gdf[color_col].min(), gdf[color_col].max())

map = folium.Map(tiles=_MAP_TILE_PROVIDER, location=[center_pt.y, center_pt.x], zoom_start=STARTING_ZOOM_LEVEL)
map._repr_html_ = lambda: map._parent._repr_html_(
Expand Down Expand Up @@ -225,15 +227,20 @@ def _get_desc(col_name):
max_width=800,
)

colormap = cm.linear.YlOrRd_09.scale(gdf[color_col].min(), gdf[color_col].max())
colormap = cm.linear.YlOrRd_09.scale(color_bounds[0], color_bounds[1])

def style_function(feature):
fill_opacity = 0.7
line_opacity = 0.2
if selection_col and not feature["properties"][selection_col]:
fill_opacity = 0.01
line_opacity = 0.01
return {
"fillColor": colormap(feature["properties"][color_col]),
"color": "black",
"weight": 0.25,
"fillOpacity": 0.7,
"lineOpacity": 0.2,
"fillOpacity": fill_opacity,
"lineOpacity": line_opacity,
}

folium.GeoJson(
Expand Down
65 changes: 63 additions & 2 deletions notebooks/validate-workflow-local.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,69 @@
"id": "ad106b0f-d891-412d-955b-9c892192cb71",
"metadata": {},
"source": [
"# Step 3: Set Thresholds\n",
"TODO Continue here"
"# Step 3: Set Thresholds"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "be068555-1fb9-42a0-b8c5-4e380a20064f",
"metadata": {},
"outputs": [],
"source": [
"threshold_N = 5 # @param {type:\"number\"}\n",
"#threshold_type = \"Top N Tasks\" # @param [\"Top N Tasks\", \"Top N% of Tasks\", \"Tasks With >=N remap_score\"]\n",
"threshold_type = \"Top N% of Tasks\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "94018e07-997d-4337-b463-48e329a8ba57",
"metadata": {},
"outputs": [],
"source": [
"from mapswipe.workflows.project_remap import SELECTION_COL, apply_threshold_filter\n",
"gdf_threshold = apply_threshold_filter(analysis_results[\"df_agg_moran_w\"], threshold_N, threshold_type, SELECTION_COL)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b9fe1df3-c68c-424e-a80d-c69e331bd192",
"metadata": {},
"outputs": [],
"source": [
"gdf_remap = gdf_threshold[gdf_threshold[SELECTION_COL] == 1.0]\n",
"gdf_remap[[\"remap_score\"]].describe().loc[[\"count\", \"min\"], :]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "97afa9ca-ee72-48a2-8795-a7049a35bd26",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "35fad871-1938-496a-8145-22e1dd08d129",
"metadata": {},
"outputs": [],
"source": [
"gdf_threshold[SELECTION_COL].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9c6c3224-0dd9-48c5-b7d8-699b6b337467",
"metadata": {},
"outputs": [],
"source": [
"create_task_map(gdf_threshold, \"remap_score\", [\"task_id\", \"remap_score\", \"0_share_uw\", SELECTION_COL], {\"remap_score\": \"Remap Score\", \"0_share_uw\": \"Raw % 'No' Responses\", \"task_id\": \"Task ID\", SELECTION_COL: \"Selected for remapping?\"}, selection_col=SELECTION_COL, color_bounds=(0.0, 1.0))"
]
}
],
Expand Down

0 comments on commit 5dde5f8

Please sign in to comment.