Skip to content

Commit

Permalink
update handling of missing dict keys
Browse files Browse the repository at this point in the history
  • Loading branch information
iulusoy committed Jun 13, 2024
1 parent be918b5 commit d8d740f
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 57 deletions.
1 change: 1 addition & 0 deletions ammico/faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def clean_subdict(self, result: dict) -> dict:
if confidence_value > self.emotion_threshold and not wears_mask
else None
)
print("emotion confidence", confidence_value, outcome)
# also set the emotion category
if outcome:
self.subdict["emotion (category)"].append(
Expand Down
9 changes: 7 additions & 2 deletions ammico/notebooks/DemoNotebook_ammico.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@
" # install setuptools\n",
" # %pip install setuptools==61 -qqq\n",
" # uninstall some pre-installed packages due to incompatibility\n",
" %pip uninstall --yes tensorflow-probability dopamine-rl lida pandas-gbq torchaudio torchdata torchtext orbax-checkpoint flex-y -qqq\n",
" %pip uninstall --yes tensorflow-probability dopamine-rl lida pandas-gbq torchaudio torchdata torchtext orbax-checkpoint flex-y jax jaxlib -qqq\n",
" # install ammico\n",
" %pip install git+https://github.com/ssciwr/ammico.git -qqq\n",
" # install older version of jax to support transformers use of diffusers\n",
" # mount google drive for data and API key\n",
" from google.colab import drive\n",
"\n",
Expand Down Expand Up @@ -95,6 +96,9 @@
"outputs": [],
"source": [
"import os\n",
"# jax also sometimes leads to problems on google colab\n",
"# if this is the case, try restarting the kernel and executing this \n",
"# and the above two code cells again\n",
"import ammico\n",
"# for displaying a progress bar\n",
"from tqdm import tqdm"
Expand Down Expand Up @@ -254,8 +258,9 @@
"outputs": [],
"source": [
"for num, key in tqdm(enumerate(image_dict.keys()), total=len(image_dict)): # loop through all images\n",
" print(image_dict[key])\n",
" image_dict[key] = ammico.EmotionDetector(image_dict[key]).analyse_image() # analyse image with EmotionDetector and update dict\n",
" \n",
" print(image_dict[key])\n",
" if num % dump_every == 0 or num == len(image_dict) - 1: # save results every dump_every to dump_file\n",
" image_df = ammico.get_dataframe(image_dict)\n",
" image_df.to_csv(dump_file)"
Expand Down
4 changes: 2 additions & 2 deletions ammico/test/data/example_faces.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
"wears_mask": ["No", "No"],
"gender": ["Man", "Man"],
"race": ["asian", "white"],
"emotion": [null, "angry"],
"emotion (category)": [null, "Negative"]
"emotion": [null, null],
"emotion (category)": [null, null]
},
"pexels-maksgelatin-4750169":
{
Expand Down
2 changes: 1 addition & 1 deletion ammico/test/test_faces.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def test_analyse_faces(get_path, monkeypatch):
for key in mydict.keys():
mydict[key].update(
fc.EmotionDetector(
mydict[key], emotion_threshold=60, accept_disclosure="OTHER_VAR"
mydict[key], emotion_threshold=80, accept_disclosure="OTHER_VAR"
).analyse_image()
)

Expand Down
17 changes: 3 additions & 14 deletions ammico/test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,30 +90,19 @@ def test_check_for_missing_keys():
"file2": {"faces": "No", "text_english": "Otherthing"},
}
# check that dict is not changed
mydict2 = ut.check_for_missing_keys(mydict)
mydict2 = ut._check_for_missing_keys(mydict)
assert mydict2 == mydict
# check that dict is updated if key is missing
mydict = {
"file1": {"faces": "Yes", "text_english": "Something"},
"file2": {"faces": "No"},
}
mydict2 = ut.check_for_missing_keys(mydict)
mydict2 = ut._check_for_missing_keys(mydict)
assert mydict2["file2"] == {"faces": "No", "text_english": None}
# check that dict is updated if more than one key is missing
mydict = {"file1": {"faces": "Yes", "text_english": "Something"}, "file2": {}}
mydict2 = ut.check_for_missing_keys(mydict)
mydict2 = ut._check_for_missing_keys(mydict)
assert mydict2["file2"] == {"faces": None, "text_english": None}
# now test the exceptions
with pytest.raises(ValueError):
ut.check_for_missing_keys({"File": "path"})
with pytest.raises(ValueError):
ut.check_for_missing_keys({"File": {}})
mydict = {
"file1": {"faces": "Yes"},
"file2": {"faces": "No", "text_english": "Something"},
}
with pytest.raises(ValueError):
ut.check_for_missing_keys(mydict)


def test_append_data_to_dict(get_path):
Expand Down
55 changes: 17 additions & 38 deletions ammico/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,52 +156,31 @@ def initialize_dict(filelist: list) -> dict:
return mydict


def check_for_missing_keys(mydict: dict) -> dict:
def _check_for_missing_keys(mydict: dict) -> dict:
"""Check the nested dictionary for any missing keys in the subdicts.
Args:
mydict(dict): The nested dictionary with keys to check.
Returns:
dict: The dictionary with keys appended."""
# check that we actually got a nested dict
if not isinstance(mydict[next(iter(mydict))], dict):
raise ValueError(
"Please provide a nested dictionary - you provided {}".format(
next(iter(mydict))
# also get all keys for all items
# currently we go through the whole dictionary twice
# however, compared to the rest of the code this is negligible
keylist = []
for key in mydict.keys():
if not isinstance(mydict[key], dict):
raise ValueError(
"Please provide a nested dictionary - you provided {}".format(key)
)
)
# gather all existing keys of first item in a list
subdict = mydict[next(iter(mydict))]
if len(list(subdict.keys())) < 1:
raise ValueError(
"Could not get any keys to compare to - please check if your nested dict is empty!"
)
keylist.append(list(mydict[key].keys()))
# find the longest list of keys
max_keys = max(keylist, key=len)
# now generate missing keys
for key in mydict.keys():
# compare keys of next item with first item
if subdict.keys() != mydict[key].keys():
# print a warning if key is not found and set to None
keys_a = set(subdict.keys())
keys_b = set(mydict[key].keys())
missing_keys_in_b = keys_a - keys_b
if missing_keys_in_b:
print(
"Found missing key(s) {} in subdict {} - setting to None.".format(
missing_keys_in_b, key
)
)
for missing_key in missing_keys_in_b:
mydict[key][missing_key] = None
# check that there are no other keys in the subdicts -
# this would only happen if there is a key missing in the first subdict
# then we would need to start over so best to
# abort if this happens - this is a very unlikely case
missing_keys_in_a = keys_b - keys_a
if missing_keys_in_a:
raise ValueError(
"Could not update missing keys - first item already missing {}".format(
missing_keys_in_a
)
)
for mkey in max_keys:
if mkey not in mydict[key].keys():
mydict[key][mkey] = None
return mydict


Expand All @@ -223,7 +202,7 @@ def dump_df(mydict: dict) -> DataFrame:


def get_dataframe(mydict: dict) -> DataFrame:
check_for_missing_keys(mydict)
_check_for_missing_keys(mydict)
outdict = append_data_to_dict(mydict)
return dump_df(outdict)

Expand Down

0 comments on commit d8d740f

Please sign in to comment.