Skip to content

Commit

Permalink
Merge branch 'olgalidev' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
olgaliak authored Oct 20, 2018
2 parents 9b65da1 + aa83c9c commit 1d35180
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 20 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ This will include cloning of https://github.com/tensorflow/models. (On my machin
_"python_file_directory"_ config value should point to the _"train"_ scripts from this project.
Example:
`python_file_directory=/home/olgali/repos/models/research/active-learning-detect/train`
3) pip install azure-blob packages: azure.storage.blob
4) pip install azure.storage.blob
pip install opencv-python
pip install pandas

### Tagger machine(s) (could be same as Linux box or separate boxes\vms)
1) Have Python 3.6 up and running.
Expand Down
1 change: 1 addition & 0 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ label_container_name=activelearninglabels
# IMAGE INFORMATION
user_folders=True
classes=knots,date
ideal_class_balance=0.7,0.3
filetype=*.png
# TAGGING MACHINE
tagging_location=C:\Users\t-yapand\Desktop\NewTag
Expand Down
59 changes: 49 additions & 10 deletions tag/download_vott_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def make_vott_output(all_predictions, output_location, user_folders, image_loc,
output_location = Path(output_location)/folder_name
else:
output_location = Path(output_location)/"Images"

output_location.mkdir(parents=True, exist_ok=True)
using_blob_storage = blob_credentials is not None
if using_blob_storage:
Expand Down Expand Up @@ -108,20 +109,51 @@ def make_vott_output(all_predictions, output_location, user_folders, image_loc,
with open(str(output_location)+".json","w") as json_out:
json.dump(dirjson, json_out)

def get_top_rows(file_location, num_rows, user_folders, pick_max):
def get_top_rows(file_location, num_rows, user_folders, pick_max, tag_names, ideal_class_balance):
if ideal_class_balance is not None and len(tag_names) != len(ideal_class_balance):
print("WARNING: Mismatch between number of classes:{} and ideal_class_balance: {}".format(tag_names,
ideal_class_balance))
print("Defaulting to ideal_class_balance: None")
ideal_class_balance = None

with (file_location/"totag.csv").open(mode='r') as file:
reader = csv.reader(file)
header = next(reader)
csv_list = list(reader)
all_files_per_class = {}
if user_folders:
all_files = defaultdict(lambda: defaultdict(list))
for row in csv_list:
all_files[row[FOLDER_LOCATION]][row[0]].append(row)
all_lists = []
class_balances_cnt = 1
if ideal_class_balance is not None:
class_balances_cnt = len(ideal_class_balance)
if pick_max:
for folder_name in all_files:
all_lists.append(nlargest(num_rows, all_files[folder_name].values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION])))
top_rows = max(all_lists,key=lambda x:sum(float(row[0][CONFIDENCE_LOCATION]) for row in x))
for k, v in all_files[folder_name].items():
v_arr = np.array(v)
classes = v_arr[:, TAG_LOCATION]
for i in range(class_balances_cnt):
class_i = tag_names[i]
if class_i in classes:
if class_i not in all_files_per_class:
#all_files_per_class[class_i] = {}
all_files_per_class[class_i] = []
#all_files_per_class[class_i][k] = v
all_files_per_class[class_i].append(v)
break;


for i in range(class_balances_cnt):
num_rows_i = int(num_rows * float(ideal_class_balance[i]))
class_i = tag_names[i]
top = nlargest(num_rows_i, all_files_per_class[class_i],
key=lambda x: float(x[0][CONFIDENCE_LOCATION]))
all_lists = all_lists + top
#all_lists.append(nlargest(num_rows, all_files[folder_name].values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION])))
#top_rows = max(all_lists,key=lambda x:sum(float(row[0][CONFIDENCE_LOCATION]) for row in x))
top_rows = all_lists
else:
for folder_name in all_files:
all_lists.append(nsmallest(num_rows, all_files[folder_name].values(), key=lambda x:float(x[0][CONFIDENCE_LOCATION])))
Expand All @@ -145,13 +177,16 @@ def get_top_rows(file_location, num_rows, user_folders, pick_max):
(tagging_writer if row[0] in tagging_files else untagged_writer).writerow(row)
return top_rows

def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc, output_location, blob_credentials=None, tag_names = ["stamp"], max_tags_per_pixel=None):
all_files = get_top_rows(file_location, num_rows, user_folders, pick_max)
def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc, output_location, blob_credentials=None,
tag_names = ["stamp"], max_tags_per_pixel=None, ideal_class_balance=None):
all_files = get_top_rows(file_location, num_rows, user_folders, pick_max, tag_names, ideal_class_balance)
# The tag_colors list generates random colors for each tag. To ensure that these colors stand out / are easy to see on a picture, the colors are generated
# in the hls format, with the random numbers biased towards a high luminosity (>=.8) and saturation (>=.75).
make_vott_output(all_files, output_location, user_folders, image_loc, blob_credentials=blob_credentials, tag_names=tag_names,
tag_colors=['#%02x%02x%02x' % (int(256*r), int(256*g), int(256*b)) for
r,g,b in [colorsys.hls_to_rgb(random.random(),0.8 + random.random()/5.0, 0.75 + random.random()/4.0) for _ in tag_names]], max_tags_per_pixel=max_tags_per_pixel)
colors = ['#%02x%02x%02x' % (int(256*r), int(256*g), int(256*b)) for
r,g,b in [colorsys.hls_to_rgb(random.random(),0.8 + random.random()/5.0, 0.75 + random.random()/4.0) for _ in tag_names]]

make_vott_output(all_files, output_location, user_folders, image_loc, blob_credentials=blob_credentials,
tag_names=tag_names, tag_colors=colors, max_tags_per_pixel=max_tags_per_pixel)

if __name__ == "__main__":
#create_vott_json(r"C:\Users\t-yapand\Desktop\GAUCC1_1533070087147.csv",20, True, r"C:\Users\t-yapand\Desktop\GAUCC", r"C:\Users\t-yapand\Desktop\Output\GAUCC")
Expand All @@ -177,10 +212,14 @@ def create_vott_json(file_location, num_rows, user_folders, pick_max, image_loc,
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"totag.csv"))
container_name = config_file["image_container_name"]
file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)]
ideal_class_balance = config_file["ideal_class_balance"].split(",")
if file_date:
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagging.csv"))
create_vott_json(csv_file_loc, int(sys.argv[1]), config_file["user_folders"]=="True", config_file["pick_max"]=="True", "", config_file["tagging_location"],
blob_credentials=(block_blob_service, container_name), tag_names=config_file["classes"].split(","), max_tags_per_pixel=config_file.get("max_tags_per_pixel",None))
create_vott_json(csv_file_loc, int(sys.argv[1]), config_file["user_folders"]=="True", config_file["pick_max"]=="True", "",
config_file["tagging_location"], blob_credentials=(block_blob_service, container_name),
tag_names=config_file["classes"].split(","),
max_tags_per_pixel=config_file.get("max_tags_per_pixel"),
ideal_class_balance =config_file.get("ideal_class_balance").split(","))
container_name = config_file["label_container_name"]
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagging.csv"))
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("totag",int(time.time() * 1000),"csv"), str(csv_file_loc/"totag.csv"))
8 changes: 4 additions & 4 deletions tag/upload_vott_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,11 @@ def select_jsons(image_directory, user_folders, file_location):
csv_file_loc = Path(config_file["tagging_location"])
file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagged_(.*).csv', blob.name)]
if file_date:
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], csv_file_loc/"tagged.csv")
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagged.csv"))
file_date = [(blob.name, blob.properties.last_modified) for blob in block_blob_service.list_blobs(container_name) if re.match(r'tagging_(.*).csv', blob.name)]
if file_date:
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], csv_file_loc/"tagging.csv")
block_blob_service.get_blob_to_path(container_name, max(file_date, key=lambda x:x[1])[0], str(csv_file_loc/"tagging.csv"))
#TODO: Ensure this parses folder recursively when given tagging location. Remove the .json part
select_jsons(config_file["tagging_location"],config_file["user_folders"]=="True",csv_file_loc)
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagged",int(time.time() * 1000),"csv"), csv_file_loc/"tagged.csv")
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), csv_file_loc/"tagging.csv")
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagged",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagged.csv"))
block_blob_service.create_blob_from_path(container_name, "{}_{}.{}".format("tagging",int(time.time() * 1000),"csv"), str(csv_file_loc/"tagging.csv"))
12 changes: 7 additions & 5 deletions test/test_download_vott_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ def setUp(self):
shutil.copyfile("./totag_source.csv", str(self.csv_file_loc / "totag.csv"))

self.csv_file_loc.mkdir(parents=True, exist_ok=True)
#self.ideal_class_balance = self.config_file["ideal_class_balance"].split(",")
self.max_tags_per_pixel = self.config_file.get("max_tags_per_pixel")

self.ideal_class_balance = self.config_file["ideal_class_balance"].split(",")
self. max_tags_per_pixel = self.config_file.get("max_tags_per_pixel")
self.tag_names = self.config_file["classes"].split(",")
self.user_folders = self.config_file["user_folders"] == "True"
self.pick_max = self.config_file["pick_max"] == "True"
Expand All @@ -41,18 +42,19 @@ def test_get_top_rows(self):
N_ROWS = 3
N_FILES = 3
all_files = get_top_rows(self.csv_file_loc, N_ROWS, self.user_folders ,
self.pick_max)
self.pick_max, self.tag_names, self.ideal_class_balance)
self.assertEqual(len(all_files), N_FILES)

def test_create_vott_json(self):
N_ROWS = 3
N_FILES = 3

FOLDER_NAME = "board_images_png"
create_vott_json(self.csv_file_loc, N_ROWS, self.user_folders ,
self.pick_max, "",
self.tagging_location, blob_credentials=None,
tag_names= self.tag_names,
max_tags_per_pixel=self. max_tags_per_pixel)
max_tags_per_pixel=self. max_tags_per_pixel ,
ideal_class_balance=self.ideal_class_balance)

res_folder = os.path.join(self.tagging_location, FOLDER_NAME)
res_immages_cnt = sum([len(files) for r, d, files in os.walk(res_folder)])
Expand Down

0 comments on commit 1d35180

Please sign in to comment.