Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TSK-281] UUID changes #10

Merged
merged 2 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 24 additions & 8 deletions app/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,18 @@
import numpy as np
import cv2
import tempfile
import shutil
import shutil,hashlib,re

def sanitize_filename(filename): #helper function
return re.sub(r'[^\w./]', '_', filename)

def get_image_hash(image):
"""
Computes a hash for the given image.
"""
hasher = hashlib.md5()
hasher.update(image.tobytes())
return hasher.hexdigest()

def get_satellite_bands_config() -> str:
"""
Expand Down Expand Up @@ -82,14 +93,15 @@ def get_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_reposito
if np.mean(image) != 0.0: #if image is not entirely blank

#TODO: implment tempfile
image_filename = f"{interval}.png"
image_hash = get_image_hash(image)
image_filename = f"{interval}_truecolor_{image_hash}.png"
image_path = os.path.join(image_dir, "true_color", image_filename)
os.makedirs(os.path.dirname(image_path), exist_ok=True)
save_image(image, image_path, factor=1.5/255, clip_range=(0, 1))
logger.info(f"True color Image saved to: {image_path}")


data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_")
data_name = sanitize_filename(f"{interval}_truecolor_{image_hash}")
relative_path = f"sentinel/{tracer_id}/{job_id}/true_color/{data_name}.png"


Expand All @@ -115,14 +127,14 @@ def get_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_reposito

last_successful_data = media_data

image_filename = f"{interval}.png"

image_filename = f"{interval}_masked_{image_hash}.png"
image_path = os.path.join(image_dir, "masked", image_filename)
os.makedirs(os.path.dirname(image_path), exist_ok=True)
save_image(image, image_path, factor=255/255, clip_range=(0, 1))
logger.info(f"Masked Image saved to: {image_path}")

data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_")

data_name = sanitize_filename(f"{interval}_masked_{image_hash}")
relative_path = f"sentinel/{tracer_id}/{job_id}/masked/{data_name}.png"


Expand Down Expand Up @@ -159,6 +171,7 @@ def augment_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_repo

for image_path in os.listdir(os.path.join(image_dir,"masked")):
interval = os.path.splitext(image_path)[0]
image_hash = image_path.split("_")[-1].split(".")[0]
full_path = os.path.join(image_dir, "masked", image_path)
image = cv2.imread(full_path)
# Extract image dimensions
Expand All @@ -183,7 +196,7 @@ def augment_images(logger: Logger, job_id: int, tracer_id:str, scraped_data_repo
os.makedirs(os.path.dirname(jsonpath), exist_ok=True)
df.to_json(jsonpath, orient="index")
logger.info(f"Augmented JSON saved to: {jsonpath}")
data_name = str(interval).strip("()").replace("-","_").replace(",","_").replace("\'","").replace(" ","_")
data_name = sanitize_filename(f"{interval}_augmented_{image_hash}")

relative_path = f"sentinel/{tracer_id}/{job_id}/augmented/{data_name}.json"

Expand Down Expand Up @@ -300,5 +313,8 @@ def scrape(
except Exception as error:
logger.error(f"{job_id}: Unable to scrape data. Job with tracer_id {tracer_id} failed. Error:\n{error}")
job_state = BaseJobState.FAILED
shutil.rmtree(image_dir)
try:
shutil.rmtree(image_dir)
except Exception as e:
print("could not delete .tmp dir")
#job.messages.append(f"Status: FAILED. Unable to scrape data. {e}")
2 changes: 1 addition & 1 deletion demo.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
python sentinel_scraper.py --start_date=2023-8-8 --end_date=2023-8-30 --long_left=-156.708984 --lat_up=20.759645 --long_right=-156.299744 --lat_down=20.955027 --log-level="INFO" \
--kp_auth_token test123 --kp_host localhost --kp_port 8000 --kp_scheme http \
--sentinel_client_id ab5cf2a1-cf8d-4423-89d3-b5b05659ffb5 --sentinel_client_secret 76Wvuf6q3pivLWcOypt2L1FW39dEvl9W
--sentinel_client_id dee33baf-3670-4c1a-a04e-60405c4cce75 --sentinel_client_secret EIkztfIPFH3IDBF7fWn8dFnqTt6Vjtnt