Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,13 @@
<label>Train model</label>
<default>true</default>
</boolean>
<boolean>
<name>useCuda</name>
<longflag>usecuda</longflag>
<description>Whether or not to use GPU/cuda (true) or cpu (false).</description>
<label>Use CUDA</label>
<default>true</default>
</boolean>
<integer>
<name>batchSize</name>
<longflag>batchsize</longflag>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ def trainModelAddItem(self, gc, record, item, annotrec, elem, feature,

def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
batchSize, epochs, trainingSplit, randomInput, labelList,
excludeLabelList, prog):
excludeLabelList, use_cuda, prog):
itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
with tempfile.TemporaryDirectory(dir=os.getcwd()) as tempdir:
trainingPath = os.path.join(tempdir, 'training.h5')
Expand Down Expand Up @@ -544,7 +544,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,
prog.progress(0)
history, modelPath = self.trainModelDetails(
record, annotationName, batchSize, epochs, itemsAndAnnot, prog, tempdir,
trainingSplit)
trainingSplit, use_cuda)

modTrainingPath = os.path.join(tempdir, '%s ModTraining Epoch %d.h5' % (
annotationName, self.getCurrentEpoch(itemsAndAnnot)))
Expand All @@ -568,7 +568,7 @@ def trainModel(self, gc, folderId, annotationName, features, modelFolderId,

def predictLabelsForItem(self, gc, annotationName, annotationFolderId, tempdir, model, item,
annotrec, elem, feature, curEpoch, userId, labels, groups,
makeHeatmaps, radius, magnification, certainty, batchSize, prog):
makeHeatmaps, radius, magnification, certainty, batchSize, use_cuda, prog):
import al_bench.factory

print('Predicting %s' % (item['name']))
Expand Down Expand Up @@ -771,7 +771,7 @@ def makeHeatmapsForItem(self, gc, annotationName, userId, tempdir, radius, item,

def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
annotationFolderId, saliencyMaps, radius, magnification,
certainty, batchSize, prog):
certainty, batchSize, use_cuda, prog):
itemsAndAnnot = self.getItemsAndAnnotations(gc, folderId, annotationName)
curEpoch = self.getCurrentEpoch(itemsAndAnnot)
folder = gc.getFolder(folderId)
Expand Down Expand Up @@ -833,7 +833,7 @@ def predictLabels(self, gc, folderId, annotationName, features, modelFolderId,
self.predictLabelsForItem(
gc, annotationName, annotationFolderId, tempdir, model, item, annotrec, elem,
features.get(item['_id']), curEpoch, userId, labels, groups, saliencyMaps,
radius, magnification, certainty, batchSize, prog)
radius, magnification, certainty, batchSize, use_cuda, prog)
prog.progress(1)

def main(self, args):
Expand Down Expand Up @@ -864,5 +864,5 @@ def main(self, args):

self.predictLabels(
gc, args.images, args.annotationName, features, args.modeldir, args.annotationDir,
args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize,
args.heatmaps, args.radius, args.magnification, args.certainty, args.batchSize, args.useCuda,
prog)
Original file line number Diff line number Diff line change
Expand Up @@ -35,33 +35,56 @@ class SuperpixelClassificationTensorflow(SuperpixelClassificationBase):
def __init__(self):
self.training_optimal_batchsize: Optional[int] = None
self.prediction_optimal_batchsize: Optional[int] = None
self.use_cuda = False

def trainModelDetails(self, record, annotationName, batchSize, epochs, itemsAndAnnot, prog,
tempdir, trainingSplit):
# print(f'Tensorflow trainModelDetails(batchSize={batchSize}, ...)')
# make model
num_classes = len(record['labels'])
model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1.0 / 255),
tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
# tf.keras.layers.Dropout(0.2),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)])
prog.progress(0.2)
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
tempdir, trainingSplit, use_cuda):
self.use_cuda = use_cuda

# Enable GPU memory growth globally to avoid precondition errors
gpus = tf.config.list_physical_devices('GPU')
if gpus and self.use_cuda:
try:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
except RuntimeError as e:
print(f"Could not set memory growth: {e}")
if not self.use_cuda:
tf.config.set_visible_devices([], 'GPU')
device = "gpu" if use_cuda else "cpu"
print(f"Using device: {device}")

# Dataset preparation (outside strategy scope)
ds_h5 = record['ds']
labelds_h5 = record['labelds']
# Fully load to memory and break h5py reference
ds_numpy = np.array(ds_h5[:])
labelds_numpy = np.array(labelds_h5[:])

strategy = tf.distribute.MirroredStrategy()
with strategy.scope():
num_classes = len(record['labels'])
model = tf.keras.Sequential([
tf.keras.layers.Rescaling(1.0 / 255),
tf.keras.layers.Conv2D(16, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(64, 3, padding='same', activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)])
prog.progress(0.2)
model.compile(optimizer='adam',
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])

prog.progress(0.7)
# generate split
full_ds = tf.data.Dataset.from_tensor_slices((record['ds'], record['labelds']))
full_ds = full_ds.shuffle(1000) # add seed=123 ?
count = len(full_ds)
# generate split using numpy arrays
full_ds = tf.data.Dataset.from_tensor_slices((ds_numpy, labelds_numpy))
full_ds = full_ds.shuffle(1000)
count = len(ds_numpy)
train_size = int(count * trainingSplit)
if batchSize < 1:
batchSize = self.findOptimalBatchSize(model, full_ds, training=True)
Expand All @@ -85,24 +108,53 @@ def trainModelDetails(self, record, annotationName, batchSize, epochs, itemsAndA
self.saveModel(model, modelPath)
return history, modelPath

def _get_device(self, use_cuda):
if tf.config.list_physical_devices('GPU') and use_cuda:
return '/GPU:0'
return '/CPU:0'

def predictLabelsForItemDetails(
self, batchSize, ds: h5py._hl.dataset.Dataset, item, model, prog,
self, batchSize, ds: h5py._hl.dataset.Dataset, indices, item, model, use_cuda, prog,
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the indices are for the cutoff PR

):
# print(f'Tensorflow predictLabelsForItemDetails(batchSize={batchSize}, ...)')
if batchSize < 1:
batchSize = self.findOptimalBatchSize(
model, tf.data.Dataset.from_tensor_slices(ds), training=False,
)
print(f'Optimal batch size for prediction = {batchSize}')
predictions = model.predict(
ds,
batch_size=batchSize,
callbacks=[_LogTensorflowProgress(
prog, (ds.shape[0] + batchSize - 1) // batchSize, 0.05, 0.35, item)])
prog.item_progress(item, 0.4)
# softmax to scale to 0 to 1
catWeights = tf.nn.softmax(predictions)
return catWeights, predictions

device = self._get_device(use_cuda)
with tf.device(device):
# Create a dataset that pairs the data with their indices
dataset = tf.data.Dataset.from_tensor_slices((ds, indices))
dataset = dataset.batch(batchSize)

# Initialize arrays to store results
all_predictions = []
all_cat_weights = []
all_indices = []

# Iterate through batches manually to keep track of indices
for data, batch_indices in dataset:
batch_predictions = model.predict(
data,
batch_size=batchSize,
verbose=0) # Set verbose=0 to avoid multiple progress bars

# Apply softmax to scale to 0 to 1
batch_cat_weights = tf.nn.softmax(batch_predictions)

all_predictions.append(batch_predictions)
all_cat_weights.append(batch_cat_weights)
all_indices.append(batch_indices)

prog.item_progress(item, 0.4)

# Concatenate all results
predictions = tf.concat(all_predictions, axis=0)
catWeights = tf.concat(all_cat_weights, axis=0)
final_indices = tf.concat(all_indices, axis=0)

return catWeights.numpy(), predictions.numpy(), final_indices.numpy().astype(np.int64)

def findOptimalBatchSize(self, model, ds, training) -> int:
if training and self.training_optimal_batchsize is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,12 +66,10 @@ class _BayesianPatchTorchModel(bbald.consistent_mc_dropout.BayesianModule):
# A Bayesian model that takes patches (2-dimensional shape) rather than vectors
# (1-dimensional shape) as input. It is useful when feature != 'vector' and
# SuperpixelClassificationBase.certainty == 'batchbald'.
def __init__(self, num_classes: int) -> None:
def __init__(self, num_classes: int, device: torch.device) -> None:
# Set `self.device` as early as possible so that other code does not lock out
# what we want.
self.device: str = torch.device(
('cuda' if torch.cuda.is_available() and torch.cuda.device_count() > 0 else 'cpu'),
)
self.device : torch.device = device
# print(f'Initial model.device = {self.device}')
super(_BayesianPatchTorchModel, self).__init__()

Expand Down Expand Up @@ -311,7 +309,10 @@ def trainModelDetails(
prog: ProgressHelper,
tempdir: str,
trainingSplit: float,
cuda : bool,
):
device = torch.device("cuda" if cuda else "cpu")
print(f"Using device: {device}")
# make model
num_classes: int = len(record['labels'])
model: torch.nn.Module
Expand Down Expand Up @@ -507,7 +508,7 @@ def fitModel(
return history

def predictLabelsForItemDetails(
self, batchSize: int, ds_h5, item, model: torch.nn.Module, prog: ProgressHelper,
self, batchSize: int, ds_h5, item, model: torch.nn.Module, use_cuda : bool, prog: ProgressHelper,
):
# print(f'Torch predictLabelsForItemDetails(batchSize={batchSize}, ...)')
num_superpixels: int = ds_h5.shape[0]
Expand All @@ -528,6 +529,9 @@ def predictLabelsForItemDetails(
)
if self.certainty == 'batchbald'
else dict(num_superpixels=num_superpixels, num_classes=num_classes)
# also set on model.device, ideally
#device = torch.device("cuda" if use_cuda else "cpu")

)
for cb in callbacks:
cb.on_predict_begin(logs=logs)
Expand Down