Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiple tiling adjustments #27

Merged
merged 10 commits into from
Apr 17, 2023
Merged
110 changes: 46 additions & 64 deletions src/alpineer/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,34 +344,25 @@ def load_imgs_from_dir(


def check_fov_name_prefix(fov_list):
"""Checks for a prefix (usually detailing a run name) in any of the provided FOV names
"""Checks for a prefix (usually detailing a run/tile name) in any of the provided FOV names

Args:
fov_list (list): list of fov name
Returns:
tuple: (bool) whether at least one fov names has a prefix,
(list / dict) if prefix, dictionary with fov name as keys and prefixes as values
otherwise return a simple list of the fov names
(dict): dictionary with prefixes as the keys and fov names as values
"""

# check for prefix in any of the fov names
prefix = False
# dict containing fov name and run name
fov_names = {}
for folder in fov_list:
if re.search("R.{1,3}C.{1,3}", folder).start() != 0:
prefix = True

if prefix:
# dict containing fov name and run name
fov_names = {}
for folder in fov_list:
fov = "".join(folder.split("_")[-1:])
prefix_name = "_".join(folder.split("_")[:-1])
fov_names[fov] = prefix_name
else:
# original list of fov names
fov_names = fov_list
fov = "".join(folder.split("_")[-1:])
prefix_name = "_".join(folder.split("_")[:-1])
if prefix_name in fov_names.keys():
fov_names[prefix_name].append(fov)
else:
fov_names[prefix_name] = [fov]

return prefix, fov_names
return fov_names


def get_tiled_fov_names(fov_list, return_dims=False):
Expand All @@ -383,37 +374,44 @@ def get_tiled_fov_names(fov_list, return_dims=False):
return_dims (bool):
whether to also return row and col dimensions
Returns:
tuple: names of all fovs expected for tiled image shape, and dimensions if return_dims
list: list of tuples the fov prefix, all fovs expected for tiled image shape,
row_num and col_num
"""

rows, cols, expected_fovs = [], [], []

# check for run name prefix
prefix, fov_names = check_fov_name_prefix(fov_list)
search_term: re.Pattern = re.compile(r"(R\+?\d+)(C\+?\d+)")

# get tiled image dimensions
for fov in fov_names:
R, C = re.search(search_term, fov).group(1, 2)
rows.append(int(R[1:]))
cols.append(int(C[1:]))

row_num, col_num = max(rows), max(cols)

# fill list of expected fov names
for n in range(row_num):
for m in range(col_num):
fov = f"R{n + 1}C{m + 1}"
# prepend run names
if prefix and fov in list(fov_names.keys()):
expected_fovs.append(f"{fov_names[fov]}_" + fov)
else:
expected_fovs.append(fov)
expected_tiles = []

# check for run name prefixes
tiled_fov_names = check_fov_name_prefix(fov_list)
prefixes = tiled_fov_names.keys()
search_term: re.Pattern = re.compile(r"R\+?(\d+)C\+?(\d+)")

# get expected names for each tile
for tile in prefixes:
rows, cols, expected_fovs = [], [], []
fov_names = tiled_fov_names[tile]
# get tiled image dimensions
for fov in fov_names:
R, C = re.search(search_term, fov).group(1, 2)
rows.append(int(R))
cols.append(int(C))
row_num, col_num = max(rows), max(cols)

# fill list of expected fov names
for n in range(row_num):
for m in range(col_num):
fov = f"R{n + 1}C{m + 1}"
# prepend run names
if tile == "":
expected_fovs.append(fov)
else:
expected_fovs.append(f"{tile}_" + fov)

if return_dims:
expected_tiles.append((tile, expected_fovs, row_num, col_num))
else:
expected_tiles.append(expected_fovs)

if return_dims:
return expected_fovs, row_num, col_num
else:
return expected_fovs
return expected_tiles


def load_tiled_img_data(
Expand Down Expand Up @@ -452,22 +450,6 @@ def load_tiled_img_data(
fov_list = fovs
tiled_names = []

# no missing fov images, load data normally and return array
if len(fov_list) == len(expected_fovs):
if single_dir:
img_xr = load_imgs_from_dir(
data_dir,
match_substring=channel,
xr_dim_name="channels",
trim_suffix="_" + channel,
xr_channel_names=[channel],
)
else:
img_xr = load_imgs_from_tree(
data_dir, img_sub_folder, fovs=fov_list, channels=[channel]
)
return img_xr

# missing fov directories, read in a test image to get data type
if single_dir:
test_path = os.path.join(data_dir, expected_fovs[0] + "_" + channel + "." + file_ext)
Expand Down
162 changes: 90 additions & 72 deletions tests/load_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,67 +355,76 @@ def test_load_imgs_from_dir():

def test_check_fov_name_prefix():
# check no prefix
prefix, fovs = load_utils.check_fov_name_prefix(["R1C1", "R1C2", "R1C3"])
assert prefix is False and fovs == ["R1C1", "R1C2", "R1C3"]
fovs = load_utils.check_fov_name_prefix(["R1C1", "R1C2", "R1C3"])
assert fovs == {"": ["R1C1", "R1C2", "R1C3"]}

# check all fovs have prefix
prefix, fovs = load_utils.check_fov_name_prefix(["Run_1_R1C1", "Run_2_R1C2", "Run_1_R1C3"])
assert prefix is True and fovs == {"R1C1": "Run_1", "R1C2": "Run_2", "R1C3": "Run_1"}
fovs = load_utils.check_fov_name_prefix(["Run_1_R1C1", "Run_2_R1C2", "Run_1_R1C3"])
assert fovs == {"Run_1": ["R1C1", "R1C3"], "Run_2": ["R1C2"]}

# check some fovs have prefix
prefix, fovs = load_utils.check_fov_name_prefix(["R1C1", "R1C2", "run1_R1C3"])
assert prefix is True and fovs == {"R1C1": "", "R1C2": "", "R1C3": "run1"}
fovs = load_utils.check_fov_name_prefix(["R1C1", "R1C2", "Tile1_R1C3"])
assert fovs == {"": ["R1C1", "R1C2"], "Tile1": ["R1C3"]}


def test_get_tiled_fov_names():
# check no missing fovs, should return a list with all fovs for a 3x4 tiling
# SINGLE TILE
# check no missing fovs and no prefix, should return a list with all fovs for a 2x2 tiling
fov_names = ["R1C1", "R1C2", "R2C1", "R2C2"]

expected_fovs = load_utils.get_tiled_fov_names(fov_names, return_dims=False)
tiles = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
prefix, expected_fovs, rows, cols = tiles[0]
assert prefix == ""
assert expected_fovs == ["R1C1", "R1C2", "R2C1", "R2C2"]
assert (rows, cols) == (2, 2)

# check no missing fovs and run name attached, should return a list for 1x3 tiling
fov_names = ["Run_10_R1C1", "Run_10_R1C2", "Run_20_R1C3"]
fov_names = ["Run_10_R1C1", "Run_10_R1C2", "Run_10_R1C3"]

expected_fovs, rows, cols = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
assert expected_fovs == ["Run_10_R1C1", "Run_10_R1C2", "Run_20_R1C3"]
tiles = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
prefix, expected_fovs, rows, cols = tiles[0]
assert prefix == "Run_10"
assert expected_fovs == ["Run_10_R1C1", "Run_10_R1C2", "Run_10_R1C3"]
assert (rows, cols) == (1, 3)

# check missing fovs, should return a list with all fovs for a 3x4 tiling
fov_names = ["R1C1", "R1C2", "R2C1", "R2C4", "R3C1"]

expected_fovs, rows, cols = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
assert expected_fovs == [
"R1C1",
"R1C2",
"R1C3",
"R1C4",
"R2C1",
"R2C2",
"R2C3",
"R2C4",
"R3C1",
"R3C2",
"R3C3",
"R3C4",
]
tiles = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
prefix, expected_fovs, rows, cols = tiles[0]
assert prefix == ""
assert expected_fovs == ns.natsorted([f"R{n}C{m}" for n in range(1, 4) for m in range(1, 5)])
assert (rows, cols) == (3, 4)

# check missing fovs with run name attached, should return a list with all fovs for 1x3 tiling
fov_names = ["Run_10_R1C1", "Run_20_R1C3"]

expected_fovs, rows, cols = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
assert expected_fovs == ["Run_10_R1C1", "R1C2", "Run_20_R1C3"]
assert (rows, cols) == (1, 3)

# Check that indicies larger than 9 are handled appropriately.
fov_names = ["R1C1", "R10C1", "R2C12"]
expected_fovs, rows, cols = load_utils.get_tiled_fov_names(fov_names, return_dims=True)

assert ns.natsorted([f"R{n}C{m}" for n in range(1, 11) for m in range(1, 13)]) == expected_fovs

tiles = load_utils.get_tiled_fov_names(fov_names, return_dims=True)
prefix, expected_fovs, rows, cols = tiles[0]
assert prefix == ""
assert expected_fovs == ns.natsorted([f"R{n}C{m}" for n in range(1, 11) for m in range(1, 13)])
assert (rows, cols) == (10, 12)

# MULTIPLE TILES
for tile_names in [("Tile_1_", "Tile_2_"), ("", "named_tile_")]:
prefix_1, prefix_2 = tile_names
fov_names = [f"{prefix_1}R6C9", f"{prefix_2}R3C5"]
tiles = load_utils.get_tiled_fov_names(fov_names, return_dims=True)

# check first tile for 6x9
prefix, expected_fovs, rows, cols = tiles[0]
assert prefix == prefix_1[:-1] and (rows, cols) == (6, 9)
assert expected_fovs == ns.natsorted(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

natsort is such an amazing module.

[f"{prefix_1}R{n}C{m}" for n in range(1, 7) for m in range(1, 10)]
)

# check second tile for 3x5
prefix, expected_fovs, rows, cols = tiles[1]
assert prefix == prefix_2[:-1] and (rows, cols) == (3, 5)
assert expected_fovs == ns.natsorted(
[f"{prefix_2}R{n}C{m}" for n in range(1, 4) for m in range(1, 6)]
)


@pytest.mark.parametrize("single_dir, img_sub_folder", [(False, "TIFs"), (True, "")])
def test_load_tiled_img_data(single_dir, img_sub_folder):
Expand Down Expand Up @@ -460,7 +469,7 @@ def test_load_tiled_img_data(single_dir, img_sub_folder):
if not single_dir:
toffy_fovs = {"R1C1": "fov-1", "R1C2": "fov-3", "R1C3": "fov-2"}
fovs = list(toffy_fovs.values())
expected_fovs = load_utils.get_tiled_fov_names(list(toffy_fovs.keys()))
expected_fovs = load_utils.get_tiled_fov_names(list(toffy_fovs.keys()))[0]

filelocs, data_xr = test_utils.create_paired_xarray_fovs(
temp_dir,
Expand All @@ -483,6 +492,8 @@ def test_load_tiled_img_data(single_dir, img_sub_folder):
img_sub_folder=img_sub_folder,
)

# change data_xr fov names to match toffy names
data_xr.coords["fovs"] = loaded_xr["fovs"]
assert loaded_xr.equals(data_xr[:, :, :, :-1])
assert loaded_xr.shape == (3, 10, 10, 1)

Expand Down Expand Up @@ -519,42 +530,49 @@ def test_load_tiled_img_data(single_dir, img_sub_folder):
assert loaded_xr.equals(data_xr[:, :, :, :-1])
assert loaded_xr.shape == (4, 10, 10, 1)

# check toffy dict loading
# check toffy dict loading with and without prefix
if not single_dir:
toffy_fovs = {"R1C1": "fov-3", "R1C2": "fov-1", "R2C1": "fov-4", "R2C2": "fov-2"}
fovs = list(toffy_fovs.values())
expected_fovs = load_utils.get_tiled_fov_names(list(toffy_fovs.keys()))

filelocs, data_xr = test_utils.create_paired_xarray_fovs(
temp_dir,
fovs,
["chan1", "chan2"],
img_shape=(10, 10),
delimiter="_",
fills=True,
sub_dir=img_sub_folder,
dtype="int16",
single_dir=single_dir,
)
data_xr["fovs"] = list(toffy_fovs.keys())

# remove images and expected data for one fov
data_xr[2, :, :, :] = np.zeros((10, 10, 1), dtype="int16")
shutil.rmtree(os.path.join(temp_dir, "fov-4"))
toffy_fovs.pop("R2C1")

# check successful loading for one channel
loaded_xr = load_utils.load_tiled_img_data(
temp_dir,
toffy_fovs,
expected_fovs,
"chan1",
single_dir=single_dir,
img_sub_folder=img_sub_folder,
)

assert loaded_xr.equals(data_xr[:, :, :, :-1])
assert loaded_xr.shape == (4, 10, 10, 1)
for prefix in ["Tile_1_", ""]:
with tempfile.TemporaryDirectory() as temp_dir2:
toffy_fovs = {
f"{prefix}R1C1": "fov-3",
f"{prefix}R1C2": "fov-1",
f"{prefix}R2C1": "fov-4",
f"{prefix}R2C2": "fov-2",
}
fovs = list(toffy_fovs.values())
expected_fovs = load_utils.get_tiled_fov_names(list(toffy_fovs.keys()))[0]

filelocs, data_xr = test_utils.create_paired_xarray_fovs(
temp_dir2,
fovs,
["chan1", "chan2"],
img_shape=(10, 10),
delimiter="_",
fills=True,
sub_dir=img_sub_folder,
dtype="int16",
single_dir=single_dir,
)
data_xr["fovs"] = list(toffy_fovs.keys())

# remove images and expected data for one fov
data_xr[2, :, :, :] = np.zeros((10, 10, 1), dtype="int16")
shutil.rmtree(os.path.join(temp_dir2, "fov-4"))
toffy_fovs.pop(list(toffy_fovs.keys())[2])

# check successful loading for one channel
loaded_xr = load_utils.load_tiled_img_data(
temp_dir2,
toffy_fovs,
expected_fovs,
"chan1",
single_dir=single_dir,
img_sub_folder=img_sub_folder,
)

assert loaded_xr.equals(data_xr[:, :, :, :-1])
assert loaded_xr.shape == (4, 10, 10, 1)

# test loading with data_xr containing float values
with tempfile.TemporaryDirectory() as temp_dir:
Expand Down