From 0767fffa45423d9521e6b40e43e2237d12bdb435 Mon Sep 17 00:00:00 2001 From: Rahul-7131 Date: Tue, 6 Aug 2024 21:38:20 +0530 Subject: [PATCH 1/2] UUID changes --- app/scraper.py | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/app/scraper.py b/app/scraper.py index 0df73de..4a32f4d 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -15,7 +15,18 @@ import numpy as np import cv2 import tempfile -import shutil +import shutil,hashlib,re + +def sanitize_filename(filename): #helper function + return re.sub(r'[^\w./]', '_', filename) + +def get_image_hash(image): + """ + Computes a hash for the given image. + """ + hasher = hashlib.md5() + hasher.update(image.tobytes()) + return hasher.hexdigest() def get_satellite_bands_config() -> str: """ @@ -82,14 +93,15 @@ def get_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_reposito if np.mean(image) != 0.0: #if image is not entirely blank #TODO: implment tempfile - image_filename = f"{interval}.png" + image_hash = get_image_hash(image) + image_filename = f"{interval}_truecolor_{image_hash}.png" image_path = os.path.join(image_dir, "true_color", image_filename) os.makedirs(os.path.dirname(image_path), exist_ok=True) save_image(image, image_path, factor=1.5/255, clip_range=(0, 1)) logger.info(f"True color Image saved to: {image_path}") - data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_") + data_name = sanitize_filename(f"{interval}_truecolor_{image_hash}") relative_path = f"sentinel/{tracer_id}/{job_id}/true_color/{data_name}.png" @@ -115,14 +127,14 @@ def get_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_reposito last_successful_data = media_data - image_filename = f"{interval}.png" + + image_filename = f"{interval}_masked_{image_hash}.png" image_path = os.path.join(image_dir, "masked", image_filename) os.makedirs(os.path.dirname(image_path), exist_ok=True) save_image(image, image_path, factor=255/255, clip_range=(0, 1)) logger.info(f"Masked Image saved to: {image_path}") - data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_") - + data_name = sanitize_filename(f"{interval}_masked_{image_hash}") relative_path = f"sentinel/{tracer_id}/{job_id}/masked/{data_name}.png" @@ -159,6 +171,7 @@ def augment_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_repo for image_path in os.listdir(os.path.join(image_dir,"masked")): interval = os.path.splitext(image_path)[0] + image_hash = image_path.split("_")[-1].split(".")[0] full_path = os.path.join(image_dir, "masked", image_path) image = cv2.imread(full_path) # Extract image dimensions @@ -183,7 +196,7 @@ def augment_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_repo os.makedirs(os.path.dirname(jsonpath), exist_ok=True) df.to_json(jsonpath, orient="index") logger.info(f"Augmented JSON saved to: {jsonpath}") - data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_") + data_name = sanitize_filename(f"{interval}_augmented_{image_hash}") relative_path = f"sentinel/{tracer_id}/{job_id}/augmented/{data_name}.json" From 5da4a25f5631bc65066028b5628c018ca285d250 Mon Sep 17 00:00:00 2001 From: Ram Vempati Date: Wed, 7 Aug 2024 00:14:21 -0400 Subject: [PATCH 2/2] fix rmtree --- app/scraper.py | 5 ++++- demo.sh | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/app/scraper.py b/app/scraper.py index 4a32f4d..1f3e85f 100644 --- a/app/scraper.py +++ b/app/scraper.py @@ -313,5 +313,8 @@ def scrape( except Exception as error: logger.error(f"{job_id}: Unable to scrape data. Job with tracer_id {tracer_id} failed. Error:\n{error}") job_state = BaseJobState.FAILED - shutil.rmtree(image_dir) + try: + shutil.rmtree(image_dir) + except Exception as e: + print("could not delete .tmp dir") #job.messages.append(f"Status: FAILED. Unable to scrape data. {e}") \ No newline at end of file diff --git a/demo.sh b/demo.sh index 09429e2..d8c4fc6 100755 --- a/demo.sh +++ b/demo.sh @@ -1,3 +1,3 @@ python sentinel_scraper.py --start_date=2023-8-8 --end_date=2023-8-30 --long_left=-156.708984 --lat_up=20.759645 --long_right=-156.299744 --lat_down=20.955027 --log-level="INFO" \ --kp_auth_token test123 --kp_host localhost --kp_port 8000 --kp_scheme http \ - --sentinel_client_id ab5cf2a1-cf8d-4423-89d3-b5b05659ffb5 --sentinel_client_secret 76Wvuf6q3pivLWcOypt2L1FW39dEvl9W \ No newline at end of file + --sentinel_client_id dee33baf-3670-4c1a-a04e-60405c4cce75 --sentinel_client_secret EIkztfIPFH3IDBF7fWn8dFnqTt6Vjtnt \ No newline at end of file