Skip to content

Commit

Permalink
compare canonicalized paths and print message on skips
Browse files Browse the repository at this point in the history
  • Loading branch information
ffalor committed Mar 14, 2024
1 parent ce37d06 commit 97acc81
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 4 deletions.
10 changes: 8 additions & 2 deletions falcon_data_replicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,12 @@ def handle_file(path, key, target_bkt, file_object=None, log_util: logging.Logge
def download_message_files(msg, s3ta, s3or, log: logging.Logger):
"""Download the file specified in the SQS message and trigger file handling."""
# Construct output path for this message's files
msg_output_path = os.path.join(FDR.output_path, msg['pathPrefix'])
msg_output_path = os.path.realpath(os.path.join(FDR.output_path, msg["pathPrefix"]))
# Only write files to the specified output_path
if os.path.commonpath([FDR.output_path, msg_output_path]) != FDR.output_path:
log.debug(
f"Skipping {msg_output_path} to prevent writes outside of output path: {FDR.output_path}"
)
return
# Ensure directory exists at output path
if not os.path.exists(msg_output_path):
Expand All @@ -175,9 +178,12 @@ def download_message_files(msg, s3ta, s3or, log: logging.Logger):
total_download_time_per_input_file = 0
if not FDR.in_memory_transfer_only:
# Create a local path name for our destination file based off of the S3 path
local_path = os.path.join(FDR.output_path, s3_path)
local_path = os.path.realpath(os.path.join(FDR.output_path, s3_path))
# Only write files to the specified output_path
if os.path.commonpath([FDR.output_path, local_path]) != FDR.output_path:
log.debug(
f"Skipping {local_path} to prevent writes outside of output path: {FDR.output_path}"
)
continue
if not os.path.exists(os.path.dirname(local_path)):
# Handle fdr platform and time partitioned folders
Expand Down
10 changes: 8 additions & 2 deletions standalone/falcon_data_replicator.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,12 @@ def download_message_files(msg):
move it to our output_path, and then call handle_file.
"""
# Construct output path for this message's files
msg_output_path = os.path.join(FDR.output_path, msg['pathPrefix'])
msg_output_path = os.path.realpath(os.path.join(FDR.output_path, msg["pathPrefix"]))
# Only write files to the specified output_path
if os.path.commonpath([FDR.output_path, msg_output_path]) != FDR.output_path:
logger.info(
f"Skipping {msg_output_path} to prevent writes outside of output path: {FDR.output_path}"
)
return
# Ensure directory exists at output path
if not os.path.exists(msg_output_path):
Expand All @@ -171,9 +174,12 @@ def download_message_files(msg):
s3_path = s3_file['path']
if not FDR.in_memory_transfer_only:
# Create a local path name for our destination file based off of the S3 path
local_path = os.path.join(FDR.output_path, s3_path)
local_path = os.path.realpath(os.path.join(FDR.output_path, s3_path))
# Only write files to the specified output_path
if os.path.commonpath([FDR.output_path, local_path]) != FDR.output_path:
logger.info(
f"Skipping {local_path} to prevent writes outside of output path: {FDR.output_path}"
)
continue
# Open our local file for binary write
with open(local_path, 'wb') as data:
Expand Down

0 comments on commit 97acc81

Please sign in to comment.